python - Tensorflow-gpu获取卷积算法失败

我正在尝试制作一个卷积神经网络来分析 Microsoft 的猫狗数据集。我在 Windows 10 上使用 tensorflow-gpu 1.12.0、jupyter notebook 和 anaconda。我的 GPU 是 GTX 1080。我安装了 CUDA 和 cuDNN,我很确定我设置正确。我检查了版本。这是我的代码(我把它放在 jupyter 的不同单元格中)。

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle


import sys
print(sys.executable)
print(tf.__version__)


gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.4)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
print('GPU Settings set')


X = pickle.load(open('X.pickle','rb')) # Brings in the "pictures" of the training set
y = pickle.load(open('y.pickle','rb')) # Brings in the answers


X = X/255.0 # Normalizes the model so each number is between 0 and 1

print('Data Loaded')

model = Sequential()

model.add(Conv2D(64, (3,3), input_shape = X.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(64))

model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss="binary_crossentropy", optimizer='adam', metrics = ['accuracy'])

model.fit(X, y, batch_size=25, epochs=3, validation_split=0.1)

我得到这个错误:

Train on 22451 samples, validate on 2495 samples
Epoch 1/3
---------------------------------------------------------------------------
UnknownError                              Traceback (most recent call last)
<ipython-input-6-9cef6147c3c5> in <module>
     17 model.compile(loss="binary_crossentropy", optimizer='adam', metrics = ['accuracy'])
     18 
---> 19 model.fit(X, y, batch_size=25, epochs=3, validation_split=0.1)

~\Anaconda3\envs\learning\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, max_queue_size, workers, use_multiprocessing, **kwargs)
   1637           initial_epoch=initial_epoch,
   1638           steps_per_epoch=steps_per_epoch,
-> 1639           validation_steps=validation_steps)
   1640 
   1641   def evaluate(self,

~\Anaconda3\envs\learning\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in fit_loop(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps)
    213           ins_batch[i] = ins_batch[i].toarray()
    214 
--> 215         outs = f(ins_batch)
    216         if not isinstance(outs, list):
    217           outs = [outs]

~\Anaconda3\envs\learning\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
   2984 
   2985     fetched = self._callable_fn(*array_vals,
-> 2986                                 run_metadata=self.run_metadata)
   2987     self._call_fetch_callbacks(fetched[-len(self._fetches):])
   2988     return fetched[:len(self.outputs)]

~\Anaconda3\envs\learning\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
   1437           ret = tf_session.TF_SessionRunCallable(
   1438               self._session._session, self._handle, args, status,
-> 1439               run_metadata_ptr)
   1440         if run_metadata:
   1441           proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~\Anaconda3\envs\learning\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    526             None, None,
    527             compat.as_text(c_api.TF_Message(self.status.status)),
--> 528             c_api.TF_GetCode(self.status.status))
    529     # Delete the underlying status object from memory otherwise it stays alive
    530     # as there is a reference to status from this from the traceback due to

UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
     [[{{node conv2d_3/Conv2D}} = Conv2D[T=DT_FLOAT, _class=["loc:@training_2/Adam/gradients/conv2d_3/Conv2D_grad/Conv2DBackpropFilter"], data_format="NCHW", dilations=[1, 1, 1, 1], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_2/Adam/gradients/conv2d_3/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, conv2d_3/Conv2D/ReadVariableOp)]]
     [[{{node loss_2/activation_7_loss/broadcast_weights/assert_broadcastable/AssertGuard/Assert/Switch/_329}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_321_l...ert/Switch", tensor_type=DT_BOOL, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

最佳答案

希望这 link可以解决你的问题,是因为你安装的cnDNN版本和tensorflow编译出来的cuDNN版本不兼容。

复制一个新的 CUDNN 库,它应该可以工作

https://stackoverflow.com/questions/54357927/

相关文章:

tensorflow - 如何分析在 tf-serving 上运行的 tensorflow 模型?

angularjs - 获取 ItemID Office.context.mailbox.item.

bash - ssh-add <(echo "$SSH_PRIVATE_KEY") 在 gitlab

python - 如何从我的主 Flask 应用程序动态创建和启动辅助 Flask 应用程序?

angular - 如何修复 Angular 错误 : TS1039 in compiling?

python - 如何在 PyCharm 中使用 Anaconda 基础环境

spring - 在 Springfox 中用 "type":"string"和 "format":

powershell - 将 `Test-NetConnection` 与代理服务器一起使用

azure - 最大文件上传大小 Azure Web 应用程序防火墙 WAF

java - "2017-09-11 14:28:42"的 Json 反序列化失败,显示 'whil