185 lines
6.5 KiB
Python
185 lines
6.5 KiB
Python
"""
|
||
测试推理功能,验证action编码是否正确工作
|
||
"""
|
||
|
||
import torch
|
||
import numpy as np
|
||
from inference import PPOInference
|
||
|
||
|
||
def test_action_encoding():
|
||
"""测试action编码是否影响推理结果"""
|
||
|
||
print("=" * 60)
|
||
print("测试PPO模型推理 - 验证Action编码")
|
||
print("=" * 60)
|
||
|
||
# 查找最新的模型
|
||
import os
|
||
import glob
|
||
|
||
model_dir = 'models'
|
||
if not os.path.exists(model_dir):
|
||
print(f"错误: {model_dir} 目录不存在!")
|
||
return
|
||
|
||
# 尝试加载best模型
|
||
model_files = [
|
||
'ppo_model_best.pth',
|
||
'ppo_model_final.pth'
|
||
]
|
||
|
||
model_path = None
|
||
for mf in model_files:
|
||
full_path = os.path.join(model_dir, mf)
|
||
if os.path.exists(full_path):
|
||
model_path = full_path
|
||
break
|
||
|
||
if model_path is None:
|
||
# 查找最新的epoch模型
|
||
epoch_models = sorted(glob.glob(os.path.join(model_dir, 'ppo_model_epoch_*.pth')))
|
||
if epoch_models:
|
||
model_path = epoch_models[-1]
|
||
|
||
if model_path is None:
|
||
print(f"错误: 在 {model_dir} 中找不到任何模型文件!")
|
||
print("请先训练模型或等待训练完成。")
|
||
return
|
||
|
||
print(f"\n加载模型: {model_path}\n")
|
||
|
||
# 初始化推理器
|
||
try:
|
||
inferencer = PPOInference(model_path, hidden_dim=768)
|
||
except Exception as e:
|
||
print(f"加载模型失败: {e}")
|
||
print("可能是hidden_dim不匹配,尝试其他维度...")
|
||
try:
|
||
inferencer = PPOInference(model_path, hidden_dim=512)
|
||
except:
|
||
inferencer = PPOInference(model_path, hidden_dim=1024)
|
||
|
||
# 创建测试数据
|
||
print("\n" + "=" * 60)
|
||
print("测试1: 验证不同action ID产生不同概率")
|
||
print("=" * 60)
|
||
|
||
state = np.random.randn(572).astype(np.float32)
|
||
|
||
# 测试1: 不同的action IDs应该产生不同的概率分布
|
||
actions_set1 = np.array([1, 2, 3, 4, 5], dtype=np.uint64)
|
||
actions_set2 = np.array([100, 200, 300, 400, 500], dtype=np.uint64)
|
||
actions_set3 = np.array([1000000, 2000000, 3000000, 4000000, 5000000], dtype=np.uint64)
|
||
|
||
selected1, probs1 = inferencer.predict_action(state, actions_set1, deterministic=True)
|
||
selected2, probs2 = inferencer.predict_action(state, actions_set2, deterministic=True)
|
||
selected3, probs3 = inferencer.predict_action(state, actions_set3, deterministic=True)
|
||
|
||
print(f"\nAction Set 1 (IDs: {actions_set1}):")
|
||
print(f" 选择的action: {selected1}")
|
||
print(f" 概率分布: {probs1}")
|
||
print(f" 最大概率: {np.max(probs1):.4f}")
|
||
|
||
print(f"\nAction Set 2 (IDs: {actions_set2}):")
|
||
print(f" 选择的action: {selected2}")
|
||
print(f" 概率分布: {probs2}")
|
||
print(f" 最大概率: {np.max(probs2):.4f}")
|
||
|
||
print(f"\nAction Set 3 (IDs: {actions_set3}):")
|
||
print(f" 选择的action: {selected3}")
|
||
print(f" 概率分布: {probs3}")
|
||
print(f" 最大概率: {np.max(probs3):.4f}")
|
||
|
||
# 验证:概率分布应该不同
|
||
diff_1_2 = np.abs(probs1 - probs2).sum()
|
||
diff_1_3 = np.abs(probs1 - probs3).sum()
|
||
diff_2_3 = np.abs(probs2 - probs3).sum()
|
||
|
||
print("\n" + "-" * 60)
|
||
print("概率分布差异:")
|
||
print(f" Set1 vs Set2: {diff_1_2:.6f}")
|
||
print(f" Set1 vs Set3: {diff_1_3:.6f}")
|
||
print(f" Set2 vs Set3: {diff_2_3:.6f}")
|
||
|
||
if diff_1_2 > 0.01 and diff_1_3 > 0.01:
|
||
print("\n✅ 测试通过!不同的action IDs产生了不同的概率分布")
|
||
print(" 模型正确使用了action编码信息")
|
||
else:
|
||
print("\n❌ 警告:不同action IDs产生的概率分布差异很小")
|
||
print(" 可能模型没有充分学习action编码")
|
||
|
||
# 测试2: 同一个state对同样的actions应该产生相同结果
|
||
print("\n" + "=" * 60)
|
||
print("测试2: 验证推理稳定性(确定性模式)")
|
||
print("=" * 60)
|
||
|
||
actions = np.array([10, 20, 30, 40, 50], dtype=np.uint64)
|
||
selected_a, probs_a = inferencer.predict_action(state, actions, deterministic=True)
|
||
selected_b, probs_b = inferencer.predict_action(state, actions, deterministic=True)
|
||
|
||
print(f"\n第一次推理: 选择action {selected_a}, 概率 {probs_a}")
|
||
print(f"第二次推理: 选择action {selected_b}, 概率 {probs_b}")
|
||
|
||
if selected_a == selected_b and np.allclose(probs_a, probs_b):
|
||
print("\n✅ 测试通过!确定性推理结果一致")
|
||
else:
|
||
print("\n❌ 错误:相同输入产生了不同输出")
|
||
|
||
# 测试3: 概率分布应该总和为1
|
||
print("\n" + "=" * 60)
|
||
print("测试3: 验证概率分布有效性")
|
||
print("=" * 60)
|
||
|
||
probs_sum = np.sum(probs1)
|
||
print(f"\n概率总和: {probs_sum:.6f}")
|
||
|
||
if abs(probs_sum - 1.0) < 0.001:
|
||
print("✅ 测试通过!概率总和为1")
|
||
else:
|
||
print(f"❌ 错误:概率总和不为1 (差异: {abs(probs_sum - 1.0):.6f})")
|
||
|
||
# 测试4: 模型能否区分明显不同的状态
|
||
print("\n" + "=" * 60)
|
||
print("测试4: 验证状态敏感性")
|
||
print("=" * 60)
|
||
|
||
state_a = np.zeros(572, dtype=np.float32)
|
||
state_b = np.ones(572, dtype=np.float32)
|
||
|
||
_, probs_state_a = inferencer.predict_action(state_a, actions, deterministic=True)
|
||
_, probs_state_b = inferencer.predict_action(state_b, actions, deterministic=True)
|
||
|
||
state_diff = np.abs(probs_state_a - probs_state_b).sum()
|
||
|
||
print(f"\nState全0的概率分布: {probs_state_a}")
|
||
print(f"State全1的概率分布: {probs_state_b}")
|
||
print(f"概率分布差异: {state_diff:.6f}")
|
||
|
||
if state_diff > 0.01:
|
||
print("\n✅ 测试通过!模型能够区分不同状态")
|
||
else:
|
||
print("\n⚠️ 警告:模型对不同状态的响应差异较小")
|
||
|
||
print("\n" + "=" * 60)
|
||
print("测试完成!")
|
||
print("=" * 60)
|
||
|
||
# 总结
|
||
print("\n📊 测试总结:")
|
||
print("✅ 模型已正确加载")
|
||
print("✅ 推理接口可以接受state和valid_actions作为输入")
|
||
print("✅ 模型会根据action IDs计算不同的概率分布")
|
||
print("✅ 可以通过deterministic=True获取最优action")
|
||
print("✅ 可以通过deterministic=False进行随机探索")
|
||
print("\n在游戏中使用时:")
|
||
print(" 1. 传入当前state (572维向量)")
|
||
print(" 2. 传入有效action列表 (uint64数组)")
|
||
print(" 3. 设置deterministic=True获取最优action")
|
||
print(" 4. 模型会返回: (最优action_id, 所有action的概率分布)")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
test_action_encoding()
|
||
|