linux禁用密码登录使用密钥登录
vim /etc/ssh/sshd_config
PasswordAuthentication no
ChallengeResponseAuthentication no
PermitRootLogin prohibit-password
PubkeyAuthentication yes
sudo systemctl restart sshd
ssh-copy-id -i id_rsa.pub root@192.168.1.1
vim /etc/ssh/sshd_config
PasswordAuthentication no
ChallengeResponseAuthentication no
PermitRootLogin prohibit-password
PubkeyAuthentication yes
sudo systemctl restart sshd
ssh-copy-id -i id_rsa.pub root@192.168.1.1
创建虚拟环境指定版本:virtualenv venv --python=pythonx.x.x
CPU使用率高
./build/examples/alpha_zero_torch_example --game=new_game --actors=28 --evaluators=4 --inference_threads=4 --inference_batch_size=1 --train_batch_size=1024 --inference_cache=2621440 --max_simulations=100 --path=./point --checkpoint_freq=10 --max_steps=100 --verbose=false --devices=cuda:0,cpu --replay_buffer_size=655360 --explicit_learning=true
GPU利用率高
./build/examples/alpha_zero_torch_example --game=new_game --actors=20 --inference_batch_size=6 --inference_threads=3 --evaluators=4 --inference_cache=2621440 --max_simulations=100 --path=./point --checkpoint_freq=10 --max_steps=100 --devices=cuda:0,cpu --replay_buffer_size=655360 --explicit_learning=true
查看GPU驱动
watch -n 1 nvidia-smi
watch -n 1 gpustat
安装gpu驱动
sudo apt purge '^nvidia-.*'
sudo apt purge '^cuda-.*'
sudo apt purge '^libcuda.*'
sudo apt autoremove
sudo apt clean
ubuntu-drivers devices
nvcc -V
export PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
开启Cmake 调试日志
-DCMAKE_CXX_FLAGS="-fsanitize=address -g" \
-DCMAKE_CXX_FLAGS_DEBUG="-O0" \
在alpha_zero训练时无法显示错误,如何定位错误
1.检查coredump设置,如果输出为0,则表示coredump被禁用了。
ulimit -c
2.启用coredump
ulimit -c unlimited
3.配置coredump文件的保存位置
echo "/tmp/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern
4.修改open_spiel/scripts/build_and_run_tests.sh 199行-DBUILD_TYPE=Debug
5.修改open_spiel/CMakeLists.txt 46行,set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
6.运行gdb ./build/examples/alpha_zero_torch_example /tmp/core.alpha_zero_torc.444708
7.bt显示错误信息
8.frame 0定位错误
9.list 上下文错误位置
import tensorflow as tf
# 指定checkpoint文件和meta文件的路径
checkpoint_path = '/Users/mac/Desktop/model/checkpoint-25'
meta_path = '/Users/mac/Desktop/model/checkpoint-25.meta'
# 创建一个新的图
graph = tf.Graph()
with graph.as_default():
# 使用tf.compat.v1来访问旧的API
with tf.compat.v1.Session() as sess:
# 导入meta文件中的图结构
saver = tf.compat.v1.train.import_meta_graph(meta_path, clear_devices=True)
# 恢复模型的权重
saver.restore(sess, checkpoint_path)
# 指定输出节点
output_node_names = 'input,value_targets' # 你需要指定你的模型的输出节点名称
# 将变量转换为常量,并保存为pb文件
output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
sess,
sess.graph_def,
output_node_names.split(',')
)
# 保存为.pb文件
with tf.io.gfile.GFile('/Users/mac/Desktop/model/frozen_model.pb', 'wb') as f:
f.write(output_graph_def.SerializeToString())
pb预测
import tensorflow.compat.v1 as tf
# 定义一个用于加载模型的函数
def load_model(model_path):
with graph.as_default():
# 创建一个新的tf.Session
sess = tf.compat.v1.Session(graph=graph)
with tf.io.gfile.GFile(model_path, 'rb') as f:
graph_def = tf.compat.v1.GraphDef()
# 解析GraphDef
graph_def.ParseFromString(f.read())
# 导入GraphDef到图
tf.import_graph_def(graph_def, name='')
return sess
model_session = load_model('/home/keras/open_spiel/model/aaa.pb')
import pyspiel
import numpy as np
game = pyspiel.load_game("tic_tac_toe")
state=game.new_initial_state()
obs = np.expand_dims(state.observation_tensor(), 0)
mask = np.expand_dims(state.legal_actions_mask(), 0)
def get_var(name):
return model_session.graph.get_tensor_by_name(name + ":0")
input = get_var("input")
legals_mask = get_var("legals_mask")
training = get_var("training")
value_out = get_var("value_out")
policy_softmax = get_var("policy_softmax")
value, policy=model_session.run(
[value_out, policy_softmax],
feed_dict={input: np.array(obs, dtype=np.float32),
legals_mask: np.array(mask, dtype=bool),
training: False})
例如:Y=w11+w22+b
import pandas as pd
data=pd.read_csv('/Users/mac/Desktop/Keras/000.数据+课件+代码【下部分】/小型数据集/Advertising.csv')
data.head()
x=data[data.columns[1:-1]]
x
y=data.iloc[:,-1]
y
import keras
import keras.layers as layers
model=keras.Sequential()
model.add(layers.Dense(1,input_dim=3))
model.summary()
model.compile(optimizer='adam',loss='mse')
model.fit(x,y,epochs=2000)
keras.Sequential() #顺序模型
model.add(layers.Dense(1,input_dim=1)) #输入数据1维,输出也是1维
import keras
import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(0,100,30)
y=3*x+7+np.random.randn(30)*8
x
y
plt.scatter(x,y)
model=keras.Sequential()
from keras import layers
model.add(layers.Dense(1,input_dim=1))
model.summary()
#编译模型,损失函数,使得损失函数越小越好,adam:梯度下降算法,mse:均方差
model.compile(optimizer='adam',loss='mse')
#训练模型
model.fit(x,y,epochs=3000)
model.predict(x)
plt.scatter(x,y,c='r')
plt.plot(x,model.predict(x))
model.predict([150])