代码:
import tensorflow as tf
import numpy as np
import gym
import random
from collections import deque
tf.enable_eager_execution()
num_episodes = 500
num_exploration_episodes = 100
max_len_episode = 1000
batch_size = 32
learning_rate = 1e-3
gamma =1
initial_epsilon = 1
final_epsilon = 0.01
class QNetwork(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense1 = tf.keras.layers.Dense(units= 24, activation = tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(units= 24, activation = tf.nn.relu)
self.dense3 = tf.keras.layers.Dense(units = 2)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x= self.dense3(x)
return x
def predict(self, inputs):
q_values = self(inputs)
return tf.argmax(q_values, axis=-1)
env = gym.make('CartPole-v1')
model = QNetwork()
optimizer = tf.train.AdamOptimizer(learning_rate)
replay_buffer = deque(maxlen = 10000)
epsilon = initial_epsilon
for episode_id in range(num_episodes):
state = env.reset()
episode = max(
initial_epsilon* (num_exploration_episodes - episode_id) / num_exploration_episodes,
final_epsilon
)
for t in range(max_len_episode):
env.render()
if random.random() < epsilon:
action = env.action_space.sample()
else:
action = model.predict(
tf.constant(np.expand_dims(state,axis=0, dtype = tf.float32)).numpy()
)
action = action[0]
next_state, reward, done, info = env.step(action)
reward = -10. if done else reward
replay_buffer.append((state,action,reward,next_state,done))
state = next_state
if done==1:
print('episode %d, epsilon %f, score %d' % (episode_id, epsilon, t))
break
if len(replay_buffer) >=batch_size:
batch_state, batch_action, batch_reward, batch_next_state, batch_done = [np.array(a, dtype=np.float32) for a in zip(*random.sample(replay_buffer, batch_size))]
q_value = model(tf.constant(batch_next_state, dtype = tf.float32))
y = batch_reward + (gamma* tf.reduce_max(q_value, axis=1))*(1-batch_done)
with tf.GradientTape() as tape:
loss = tf.losses.mean_squared_error(
lables=y,
predictions = tf.reduce_sum(model(tf.constant(batch_state)) * tf.one_hot(batch_action, depth=2), axis=1)
)
grads = tape.gradient(loss, model.variables)
optimizer.apply_gradients(grads_and_vars = zip(grads,model.variables))
报错:/home/kalarea/.conda/envs/py35/bin/python /home/kalarea/PycharmProjects/dql_demo/dpq_cartplie.py
/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
episode 0, epsilon 1.000000, score 12
2018-10-15 12:54:59.165871: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
Traceback (most recent call last):
File "/home/kalarea/PycharmProjects/dql_demo/dpq_cartplie.py", line 77, in
predictions = tf.reduce_sum(model(tf.constant(batch_state)) * tf.one_hot(batch_action, depth=2),axis=1)
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 2439, in one_hot
name)
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 4563, in one_hot
_six.raise_from(_core._status_to_exception(e.code, message), None)
File "
tensorflow.python.framework.errors_impl.InternalError: Could not find valid device for node.
Node: {{node OneHot}} = OneHot[T=DT_FLOAT, TI=DT_FLOAT, axis=-1](dummy_input, dummy_input, dummy_input, dummy_input)
All kernels registered for op OneHot :
device='CPU'; TI in [DT_INT64]; T in [DT_VARIANT]
device='CPU'; TI in [DT_INT32]; T in [DT_VARIANT]
device='CPU'; TI in [DT_UINT8]; T in [DT_VARIANT]
device='CPU'; TI in [DT_INT64]; T in [DT_RESOURCE]
device='CPU'; TI in [DT_INT32]; T in [DT_RESOURCE]
device='CPU'; TI in [DT_UINT8]; T in [DT_RESOURCE]
device='CPU'; TI in [DT_INT64]; T in [DT_STRING]
device='CPU'; TI in [DT_INT32]; T in [DT_STRING]
device='CPU'; TI in [DT_UINT8]; T in [DT_STRING]
device='CPU'; TI in [DT_INT64]; T in [DT_BOOL]
device='CPU'; TI in [DT_INT32]; T in [DT_BOOL]
device='CPU'; TI in [DT_UINT8]; T in [DT_BOOL]
device='CPU'; TI in [DT_INT64]; T in [DT_COMPLEX128]
device='CPU'; TI in [DT_INT32]; T in [DT_COMPLEX128]
device='CPU'; TI in [DT_UINT8]; T in [DT_COMPLEX128]
device='CPU'; TI in [DT_INT64]; T in [DT_COMPLEX64]
device='CPU'; TI in [DT_INT32]; T in [DT_COMPLEX64]
device='CPU'; TI in [DT_UINT8]; T in [DT_COMPLEX64]
device='CPU'; TI in [DT_INT64]; T in [DT_DOUBLE]
device='CPU'; TI in [DT_INT32]; T in [DT_DOUBLE]
device='CPU'; TI in [DT_UINT8]; T in [DT_DOUBLE]
device='CPU'; TI in [DT_INT64]; T in [DT_FLOAT]
device='CPU'; TI in [DT_INT32]; T in [DT_FLOAT]
device='CPU'; TI in [DT_UINT8]; T in [DT_FLOAT]
device='CPU'; TI in [DT_INT64]; T in [DT_BFLOAT16]
device='CPU'; TI in [DT_INT32]; T in [DT_BFLOAT16]
device='CPU'; TI in [DT_UINT8]; T in [DT_BFLOAT16]
device='CPU'; TI in [DT_INT64]; T in [DT_HALF]
device='CPU'; TI in [DT_INT32]; T in [DT_HALF]
device='CPU'; TI in [DT_UINT8]; T in [DT_HALF]
device='CPU'; TI in [DT_INT64]; T in [DT_INT8]
device='CPU'; TI in [DT_INT32]; T in [DT_INT8]
device='CPU'; TI in [DT_UINT8]; T in [DT_INT8]
device='CPU'; TI in [DT_INT64]; T in [DT_UINT8]
device='CPU'; TI in [DT_INT32]; T in [DT_UINT8]
device='CPU'; TI in [DT_UINT8]; T in [DT_UINT8]
device='CPU'; TI in [DT_INT64]; T in [DT_INT16]
device='CPU'; TI in [DT_INT32]; T in [DT_INT16]
device='CPU'; TI in [DT_UINT8]; T in [DT_INT16]
device='CPU'; TI in [DT_INT64]; T in [DT_UINT16]
device='CPU'; TI in [DT_INT32]; T in [DT_UINT16]
device='CPU'; TI in [DT_UINT8]; T in [DT_UINT16]
device='CPU'; TI in [DT_INT64]; T in [DT_INT32]
device='CPU'; TI in [DT_INT32]; T in [DT_INT32]
device='CPU'; TI in [DT_UINT8]; T in [DT_INT32]
device='CPU'; TI in [DT_INT64]; T in [DT_INT64]
device='CPU'; TI in [DT_INT32]; T in [DT_INT64]
device='CPU'; TI in [DT_UINT8]; T in [DT_INT64]
[Op:OneHot] name: one_hot/
Exception ignored in:
Traceback (most recent call last):
File "/home/kalarea/gym/gym/envs/classic_control/rendering.py", line 143, in __del__
File "/home/kalarea/gym/gym/envs/classic_control/rendering.py", line 62, in close
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/pyglet/window/xlib/__init__.py", line 480, in close
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/pyglet/gl/xlib.py", line 345, in destroy
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/pyglet/gl/base.py", line 334, in destroy
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/pyglet/gl/xlib.py", line 335, in detach
File "/home/kalarea/.conda/envs/py35/lib/python3.5/site-packages/pyglet/gl/lib.py", line 97, in errcheck
File "
File "
File "
TypeError: 'NoneType' object is not iterable
Process finished with exit code 1
1个回答
我也是看的一个增强学习小游戏代码,貌似win10下gpu版tensorflow碰到tf.one_hot会出问题。我的解决方式是换个one_hot的等效方式,我用的是from keras.utils.np_utils import to_categorical。keras确实要精简不少。也可以自己定义实现one_hot