增加L0训练阶段的MCTS部分

2025-07-23 07:04:10 +08:00
parent 88bed2a1ef
commit 4410defbe5
23 changed files with 5205 additions and 0 deletions
--- a/tools/init.py
+++ b/tools/init.py
@@ -0,0 +1 @@
+# Deep2048 工具包
--- a/tools/benchmark.py
+++ b/tools/benchmark.py
@@ -0,0 +1,356 @@
+"""
+Deep2048 快速基准测试工具
+
+自动测试不同配置的性能，找出最优的线程数和参数设置
+"""
+
+import time
+import torch
+import multiprocessing as mp
+from typing import Dict, List, Tuple, Optional
+import json
+from pathlib import Path
+import argparse
+
+from game import Game2048
+from mcts import PureMCTS
+from training_data import TrainingDataManager
+
+
+class QuickBenchmark:
+    """快速基准测试工具"""
+    
+    def __init__(self, output_dir: str = "results/benchmark"):
+        """
+        初始化基准测试
+        
+        Args:
+            output_dir: 结果输出目录
+        """
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # 系统信息
+        self.cpu_count = mp.cpu_count()
+        self.cuda_available = torch.cuda.is_available()
+        
+        print(f"系统信息:")
+        print(f"  CPU核心数: {self.cpu_count}")
+        print(f"  CUDA可用: {self.cuda_available}")
+        if self.cuda_available:
+            print(f"  CUDA设备: {torch.cuda.get_device_name()}")
+    
+    def test_thread_performance(self, simulations: int = 200) -> Dict[int, Dict]:
+        """
+        测试不同线程数的性能
+        
+        Args:
+            simulations: 每次测试的模拟次数
+            
+        Returns:
+            线程数 -> 性能指标的字典
+        """
+        print(f"\n=== 线程性能测试 ({simulations} 模拟) ===")
+        
+        # 测试的线程数配置
+        thread_configs = [1, 2, 4]
+        if self.cpu_count >= 8:
+            thread_configs.append(8)
+        if self.cpu_count >= 16:
+            thread_configs.append(16)
+        
+        results = {}
+        
+        for num_threads in thread_configs:
+            print(f"\n测试 {num_threads} 线程...")
+            
+            # 创建MCTS
+            mcts = PureMCTS(
+                c_param=1.414,
+                max_simulation_depth=80,
+                num_threads=num_threads
+            )
+            
+            # 运行多次测试取平均值
+            times = []
+            scores = []
+            
+            for run in range(3):  # 3次运行
+                game = Game2048(height=3, width=3, seed=42 + run)
+                
+                start_time = time.time()
+                best_action, root = mcts.search(game, simulations)
+                elapsed_time = time.time() - start_time
+                
+                times.append(elapsed_time)
+                if root:
+                    # 计算平均子节点价值作为质量指标
+                    avg_value = sum(child.average_value for child in root.children.values()) / len(root.children) if root.children else 0
+                    scores.append(avg_value)
+                else:
+                    scores.append(0)
+            
+            # 计算统计指标
+            avg_time = sum(times) / len(times)
+            avg_score = sum(scores) / len(scores)
+            sims_per_sec = simulations / avg_time
+            
+            # 计算效率（每核心每秒模拟数）
+            efficiency = sims_per_sec / num_threads
+            
+            # 计算相对于单线程的加速比
+            if num_threads == 1:
+                baseline_speed = sims_per_sec
+                speedup = 1.0
+            else:
+                speedup = sims_per_sec / baseline_speed if 'baseline_speed' in locals() else 1.0
+            
+            results[num_threads] = {
+                'avg_time': avg_time,
+                'sims_per_sec': sims_per_sec,
+                'efficiency': efficiency,
+                'speedup': speedup,
+                'avg_score': avg_score,
+                'times': times
+            }
+            
+            print(f"  平均时间: {avg_time:.3f}秒")
+            print(f"  模拟速度: {sims_per_sec:.1f} 次/秒")
+            print(f"  效率: {efficiency:.1f} 模拟/秒/核心")
+            print(f"  加速比: {speedup:.2f}x")
+        
+        return results
+    
+    def test_simulation_depth(self, num_threads: int = None) -> Dict[int, Dict]:
+        """
+        测试不同模拟深度的影响
+        
+        Args:
+            num_threads: 线程数，None表示使用最优线程数
+            
+        Returns:
+            深度 -> 性能指标的字典
+        """
+        if num_threads is None:
+            num_threads = min(4, self.cpu_count)
+        
+        print(f"\n=== 模拟深度测试 ({num_threads} 线程) ===")
+        
+        depths = [50, 80, 120, 200]
+        results = {}
+        
+        for depth in depths:
+            print(f"\n测试深度 {depth}...")
+            
+            mcts = PureMCTS(
+                c_param=1.414,
+                max_simulation_depth=depth,
+                num_threads=num_threads
+            )
+            
+            game = Game2048(height=3, width=3, seed=42)
+            
+            start_time = time.time()
+            best_action, root = mcts.search(game, 150)  # 固定模拟次数
+            elapsed_time = time.time() - start_time
+            
+            sims_per_sec = 150 / elapsed_time
+            avg_value = sum(child.average_value for child in root.children.values()) / len(root.children) if root and root.children else 0
+            
+            results[depth] = {
+                'time': elapsed_time,
+                'sims_per_sec': sims_per_sec,
+                'avg_value': avg_value
+            }
+            
+            print(f"  时间: {elapsed_time:.3f}秒")
+            print(f"  速度: {sims_per_sec:.1f} 次/秒")
+            print(f"  平均价值: {avg_value:.1f}")
+        
+        return results
+    
+    def test_board_sizes(self, num_threads: int = None) -> Dict[str, Dict]:
+        """
+        测试不同棋盘大小的性能
+        
+        Args:
+            num_threads: 线程数
+            
+        Returns:
+            棋盘大小 -> 性能指标的字典
+        """
+        if num_threads is None:
+            num_threads = min(4, self.cpu_count)
+        
+        print(f"\n=== 棋盘大小测试 ({num_threads} 线程) ===")
+        
+        board_sizes = [(3, 3), (4, 4), (3, 4), (4, 3)]
+        results = {}
+        
+        for height, width in board_sizes:
+            size_key = f"{height}x{width}"
+            print(f"\n测试 {size_key} 棋盘...")
+            
+            mcts = PureMCTS(
+                c_param=1.414,
+                max_simulation_depth=80,
+                num_threads=num_threads
+            )
+            
+            game = Game2048(height=height, width=width, seed=42)
+            
+            start_time = time.time()
+            best_action, root = mcts.search(game, 100)
+            elapsed_time = time.time() - start_time
+            
+            sims_per_sec = 100 / elapsed_time
+            valid_moves = len(game.get_valid_moves())
+            
+            results[size_key] = {
+                'time': elapsed_time,
+                'sims_per_sec': sims_per_sec,
+                'valid_moves': valid_moves,
+                'board_cells': height * width
+            }
+            
+            print(f"  时间: {elapsed_time:.3f}秒")
+            print(f"  速度: {sims_per_sec:.1f} 次/秒")
+            print(f"  有效动作: {valid_moves}")
+        
+        return results
+    
+    def find_optimal_config(self) -> Dict:
+        """
+        找到最优配置
+        
+        Returns:
+            最优配置字典
+        """
+        print("\n=== 寻找最优配置 ===")
+        
+        # 测试线程性能
+        thread_results = self.test_thread_performance(200)
+        
+        # 找到最优线程数（基于效率和绝对速度的平衡）
+        best_thread_score = 0
+        best_threads = 1
+        
+        for threads, result in thread_results.items():
+            # 综合评分：速度 * 0.7 + 效率 * 0.3
+            score = result['sims_per_sec'] * 0.7 + result['efficiency'] * 0.3
+            if score > best_thread_score:
+                best_thread_score = score
+                best_threads = threads
+        
+        print(f"\n最优线程数: {best_threads}")
+        print(f"  速度: {thread_results[best_threads]['sims_per_sec']:.1f} 模拟/秒")
+        print(f"  效率: {thread_results[best_threads]['efficiency']:.1f} 模拟/秒/核心")
+        print(f"  加速比: {thread_results[best_threads]['speedup']:.2f}x")
+        
+        # 测试其他参数
+        depth_results = self.test_simulation_depth(best_threads)
+        board_results = self.test_board_sizes(best_threads)
+        
+        # 推荐配置
+        optimal_config = {
+            'recommended_threads': best_threads,
+            'recommended_depth': 80,  # 平衡性能和质量
+            'recommended_board_size': (3, 3),  # L0阶段推荐
+            'performance_summary': {
+                'best_speed': thread_results[best_threads]['sims_per_sec'],
+                'best_efficiency': thread_results[best_threads]['efficiency'],
+                'speedup': thread_results[best_threads]['speedup']
+            },
+            'system_info': {
+                'cpu_cores': self.cpu_count,
+                'cuda_available': self.cuda_available
+            }
+        }
+        
+        return optimal_config
+    
+    def run_full_benchmark(self) -> Dict:
+        """运行完整基准测试"""
+        print("Deep2048 快速基准测试")
+        print("=" * 50)
+        
+        start_time = time.time()
+        
+        # 运行所有测试
+        results = {
+            'timestamp': time.time(),
+            'system_info': {
+                'cpu_cores': self.cpu_count,
+                'cuda_available': self.cuda_available
+            },
+            'thread_performance': self.test_thread_performance(200),
+            'optimal_config': self.find_optimal_config()
+        }
+        
+        total_time = time.time() - start_time
+        results['benchmark_time'] = total_time
+        
+        # 保存结果
+        result_file = self.output_dir / f"benchmark_results_{int(time.time())}.json"
+        with open(result_file, 'w', encoding='utf-8') as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+        
+        print(f"\n基准测试完成! 用时: {total_time:.1f}秒")
+        print(f"结果已保存到: {result_file}")
+        
+        return results
+    
+    def print_recommendations(self, results: Dict):
+        """打印配置推荐"""
+        config = results['optimal_config']
+        
+        print("\n" + "=" * 50)
+        print("🚀 性能优化推荐")
+        print("=" * 50)
+        
+        print(f"推荐线程数: {config['recommended_threads']}")
+        print(f"推荐模拟深度: {config['recommended_depth']}")
+        print(f"推荐棋盘大小: {config['recommended_board_size']}")
+        
+        print(f"\n预期性能:")
+        print(f"  模拟速度: {config['performance_summary']['best_speed']:.1f} 次/秒")
+        print(f"  CPU效率: {config['performance_summary']['best_efficiency']:.1f} 模拟/秒/核心")
+        print(f"  多线程加速: {config['performance_summary']['speedup']:.2f}x")
+        
+        print(f"\n配置示例:")
+        print(f"```python")
+        print(f"mcts = PureMCTS(")
+        print(f"    c_param=1.414,")
+        print(f"    max_simulation_depth={config['recommended_depth']},")
+        print(f"    num_threads={config['recommended_threads']}")
+        print(f")")
+        print(f"```")
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description="Deep2048快速基准测试")
+    parser.add_argument("--output", "-o", default="results/benchmark", help="输出目录")
+    parser.add_argument("--quick", action="store_true", help="快速测试模式")
+    
+    args = parser.parse_args()
+    
+    benchmark = QuickBenchmark(args.output)
+    
+    if args.quick:
+        # 快速测试模式
+        print("快速测试模式")
+        thread_results = benchmark.test_thread_performance(100)
+        
+        # 简单推荐
+        best_threads = max(thread_results.keys(), key=lambda k: thread_results[k]['sims_per_sec'])
+        print(f"\n快速推荐: 使用 {best_threads} 线程")
+        print(f"预期速度: {thread_results[best_threads]['sims_per_sec']:.1f} 模拟/秒")
+    else:
+        # 完整基准测试
+        results = benchmark.run_full_benchmark()
+        benchmark.print_recommendations(results)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/cleanup.py
+++ b/tools/cleanup.py
@@ -0,0 +1,280 @@
+"""
+项目清理工具
+
+清理临时文件、旧数据目录和不必要的文件
+"""
+
+import os
+import shutil
+from pathlib import Path
+import argparse
+
+
+class ProjectCleaner:
+    """项目清理器"""
+    
+    def __init__(self, project_root: Path = None):
+        """
+        初始化清理器
+        
+        Args:
+            project_root: 项目根目录
+        """
+        self.project_root = project_root or Path(__file__).parent.parent
+        
+        # 要清理的目录模式
+        self.temp_dirs = [
+            # 旧命名的数据目录
+            "*_data",
+            "*_logs", 
+            "*_checkpoints",
+            "training_data",
+            "demo_training_data",
+            "test_mcts_data",
+            "benchmark_training_data",
+            "gameplay_training_data",
+            "demo_mcts_training",
+            "test_batch_data",
+            "test_l0_data",
+            
+            # Python缓存
+            "__pycache__",
+            ".pytest_cache",
+            
+            # 临时文件
+            "*.tmp",
+            "*.temp",
+            "*.bak",
+            "*.backup",
+        ]
+        
+        # 要清理的文件模式
+        self.temp_files = [
+            "*.pyc",
+            "*.pyo", 
+            "*.log",
+            "*.pkl",
+            "*.pickle",
+            "*.prof",
+            "*.profile",
+            "mcts_*.png",
+        ]
+    
+    def scan_cleanup_targets(self) -> dict:
+        """扫描需要清理的目标"""
+        targets = {
+            'directories': [],
+            'files': [],
+            'total_size': 0
+        }
+        
+        # 扫描目录
+        for pattern in self.temp_dirs:
+            for path in self.project_root.glob(pattern):
+                if path.is_dir():
+                    size = self._get_dir_size(path)
+                    targets['directories'].append({
+                        'path': path,
+                        'size': size,
+                        'pattern': pattern
+                    })
+                    targets['total_size'] += size
+        
+        # 扫描文件
+        for pattern in self.temp_files:
+            for path in self.project_root.rglob(pattern):
+                if path.is_file():
+                    size = path.stat().st_size
+                    targets['files'].append({
+                        'path': path,
+                        'size': size,
+                        'pattern': pattern
+                    })
+                    targets['total_size'] += size
+        
+        return targets
+    
+    def _get_dir_size(self, path: Path) -> int:
+        """计算目录大小"""
+        total_size = 0
+        try:
+            for item in path.rglob('*'):
+                if item.is_file():
+                    total_size += item.stat().st_size
+        except (OSError, PermissionError):
+            pass
+        return total_size
+    
+    def _format_size(self, size_bytes: int) -> str:
+        """格式化文件大小"""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024:
+                return f"{size_bytes:.1f} {unit}"
+            size_bytes /= 1024
+        return f"{size_bytes:.1f} TB"
+    
+    def preview_cleanup(self) -> dict:
+        """预览清理操作"""
+        targets = self.scan_cleanup_targets()
+        
+        print("🔍 扫描清理目标...")
+        print("=" * 50)
+        
+        if targets['directories']:
+            print(f"📁 目录 ({len(targets['directories'])} 个):")
+            for item in targets['directories']:
+                rel_path = item['path'].relative_to(self.project_root)
+                size_str = self._format_size(item['size'])
+                print(f"  {rel_path} ({size_str})")
+        
+        if targets['files']:
+            print(f"\n📄 文件 ({len(targets['files'])} 个):")
+            # 按大小排序，显示前10个最大的文件
+            sorted_files = sorted(targets['files'], key=lambda x: x['size'], reverse=True)
+            for item in sorted_files[:10]:
+                rel_path = item['path'].relative_to(self.project_root)
+                size_str = self._format_size(item['size'])
+                print(f"  {rel_path} ({size_str})")
+            
+            if len(targets['files']) > 10:
+                print(f"  ... 还有 {len(targets['files']) - 10} 个文件")
+        
+        total_size_str = self._format_size(targets['total_size'])
+        print(f"\n💾 总大小: {total_size_str}")
+        
+        return targets
+    
+    def clean_targets(self, targets: dict, dry_run: bool = False) -> dict:
+        """执行清理操作"""
+        results = {
+            'cleaned_dirs': 0,
+            'cleaned_files': 0,
+            'freed_space': 0,
+            'errors': []
+        }
+        
+        action = "预览" if dry_run else "清理"
+        print(f"\n🧹 {action}清理操作...")
+        print("=" * 50)
+        
+        # 清理目录
+        for item in targets['directories']:
+            try:
+                if not dry_run:
+                    shutil.rmtree(item['path'])
+                
+                rel_path = item['path'].relative_to(self.project_root)
+                size_str = self._format_size(item['size'])
+                print(f"{'[预览]' if dry_run else '✅'} 删除目录: {rel_path} ({size_str})")
+                
+                results['cleaned_dirs'] += 1
+                results['freed_space'] += item['size']
+                
+            except Exception as e:
+                error_msg = f"删除目录失败 {item['path']}: {e}"
+                results['errors'].append(error_msg)
+                print(f"❌ {error_msg}")
+        
+        # 清理文件
+        for item in targets['files']:
+            try:
+                if not dry_run:
+                    item['path'].unlink()
+                
+                rel_path = item['path'].relative_to(self.project_root)
+                size_str = self._format_size(item['size'])
+                print(f"{'[预览]' if dry_run else '✅'} 删除文件: {rel_path} ({size_str})")
+                
+                results['cleaned_files'] += 1
+                results['freed_space'] += item['size']
+                
+            except Exception as e:
+                error_msg = f"删除文件失败 {item['path']}: {e}"
+                results['errors'].append(error_msg)
+                print(f"❌ {error_msg}")
+        
+        return results
+    
+    def clean_project(self, dry_run: bool = False, interactive: bool = True) -> dict:
+        """清理项目"""
+        print("🧹 Deep2048 项目清理工具")
+        print("=" * 50)
+        
+        # 扫描目标
+        targets = self.preview_cleanup()
+        
+        if targets['total_size'] == 0:
+            print("\n✨ 项目已经很干净了！")
+            return {'cleaned_dirs': 0, 'cleaned_files': 0, 'freed_space': 0, 'errors': []}
+        
+        # 交互式确认
+        if interactive and not dry_run:
+            total_size_str = self._format_size(targets['total_size'])
+            response = input(f"\n❓ 确定要清理这些文件吗？(将释放 {total_size_str}) [y/N]: ")
+            if response.lower() not in ['y', 'yes']:
+                print("❌ 清理操作已取消")
+                return {'cleaned_dirs': 0, 'cleaned_files': 0, 'freed_space': 0, 'errors': []}
+        
+        # 执行清理
+        results = self.clean_targets(targets, dry_run)
+        
+        # 显示结果
+        print(f"\n📊 清理结果:")
+        print(f"  清理目录: {results['cleaned_dirs']} 个")
+        print(f"  清理文件: {results['cleaned_files']} 个")
+        print(f"  释放空间: {self._format_size(results['freed_space'])}")
+        
+        if results['errors']:
+            print(f"  错误: {len(results['errors'])} 个")
+            for error in results['errors']:
+                print(f"    {error}")
+        
+        if not dry_run and results['freed_space'] > 0:
+            print(f"\n✅ 清理完成！")
+        
+        return results
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description="Deep2048项目清理工具")
+    parser.add_argument("--dry-run", action="store_true", help="预览模式，不实际删除文件")
+    parser.add_argument("--yes", "-y", action="store_true", help="自动确认，不询问")
+    parser.add_argument("--project-root", help="项目根目录路径")
+    
+    args = parser.parse_args()
+    
+    # 确定项目根目录
+    if args.project_root:
+        project_root = Path(args.project_root)
+    else:
+        project_root = Path(__file__).parent.parent
+    
+    if not project_root.exists():
+        print(f"❌ 项目根目录不存在: {project_root}")
+        return 1
+    
+    # 创建清理器
+    cleaner = ProjectCleaner(project_root)
+    
+    try:
+        # 执行清理
+        results = cleaner.clean_project(
+            dry_run=args.dry_run,
+            interactive=not args.yes
+        )
+        
+        return 0 if not results['errors'] else 1
+        
+    except KeyboardInterrupt:
+        print("\n❌ 用户中断清理操作")
+        return 1
+    except Exception as e:
+        print(f"❌ 清理过程中出现错误: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    exit(main())