-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexport_code_only.py
More file actions
266 lines (221 loc) · 8.38 KB
/
export_code_only.py
File metadata and controls
266 lines (221 loc) · 8.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3
"""
只导出代码文件,跳过所有二进制和构建产物
专注于 HairMSNN 和 RTXCR 的源代码
"""
import os
import shutil
from pathlib import Path
from collections import defaultdict
# 代码文件扩展名
CODE_EXTENSIONS = {
# C/C++
".c", ".cc", ".cpp", ".cxx", ".h", ".hpp", ".hxx", ".hh", ".inl",
# CUDA
".cu", ".cuh",
# Shader
".hlsl", ".hlsli", ".glsl", ".vert", ".frag", ".comp", ".rgen", ".rchit", ".rmiss", ".rahit",
".rgs", # ray generation shader
".chs", # closest hit shader
# 脚本
".py", ".sh", ".bat", ".ps1", ".cmd",
# 配置/构建
".cmake", ".json", ".xml", ".yaml", ".yml", ".cfg", ".ini", ".toml",
".in", # CMake 模板文件 (重要!)
".pc", # pkg-config 文件
".plist", # macOS plist
".rc", # Windows resource
".def", # Module definition
".natvis", # VS debugger visualizer
".props", ".targets", # MSBuild
".sln", ".vcxproj", ".filters", # Visual Studio 项目文件
# 文档(可选)
".md", ".txt", ".rst",
# 其他
".gitignore", ".gitmodules", ".gitattributes",
}
# CMakeLists.txt 特殊处理(无扩展名匹配)
SPECIAL_FILES = {"CMakeLists.txt", "Makefile", "LICENSE", "LICENSE.txt", "License.txt"}
# 要跳过的目录
SKIP_DIRS = {
# 构建目录
"build", "Build", "out", "x64", "x86", "Debug", "Release",
"RelWithDebInfo", "MinSizeRel", "CMakeFiles",
# IDE
".vs", ".vscode", ".idea",
# 缓存
"__pycache__", ".cache", "node_modules",
# Git
".git",
# 二进制输出
"bin", "lib", "Bin", "Lib",
# 符号
"symbols",
}
# 要跳过的文件扩展名(二进制/大文件)
SKIP_EXTENSIONS = {
# 编译产物
".o", ".obj", ".a", ".lib", ".so", ".dll", ".dylib", ".exe", ".pdb", ".ilk", ".exp",
# 图片(除非需要)
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tga", ".hdr", ".exr", ".ico",
# 模型/资源
".bin", ".gltf", ".glb", ".obj", ".mtl", ".fbx", ".dae",
".hair", ".abc",
# 压缩包
".zip", ".tar", ".gz", ".7z", ".rar",
# PDF
".pdf",
# 其他二进制
".ptx", ".cubin", ".fatbin", ".dds", ".ktx",
# 字体
".ttf", ".otf", ".woff", ".woff2",
# 视频
".mp4", ".avi", ".mov", ".webm",
# 临时
".tmp", ".temp", ".log", ".bak",
# shader cache
".cso", ".spv",
}
def get_size_str(size_bytes):
"""将字节转换为可读的大小字符串"""
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.2f} TB"
def is_code_file(filepath):
"""判断是否是代码文件"""
path = Path(filepath)
name = path.name
ext = path.suffix.lower()
# 特殊文件名
if name in SPECIAL_FILES:
return True
# 无扩展名的特殊文件
if not ext and name.startswith(".git"):
return True
# 按扩展名判断
return ext in CODE_EXTENSIONS
def should_skip_dir(dirname):
"""判断是否应该跳过该目录"""
return dirname in SKIP_DIRS
def scan_code_files(root_path):
"""扫描所有代码文件"""
root = Path(root_path)
code_files = []
skipped_files = []
for dirpath, dirnames, filenames in os.walk(root):
dirpath = Path(dirpath)
# 过滤要跳过的目录
dirnames[:] = [d for d in dirnames if not should_skip_dir(d)]
for filename in filenames:
filepath = dirpath / filename
ext = filepath.suffix.lower()
# 跳过明确的二进制文件
if ext in SKIP_EXTENSIONS:
try:
size = filepath.stat().st_size
skipped_files.append((filepath, size, "二进制/资源"))
except:
pass
continue
# 检查是否是代码文件
if is_code_file(filepath):
try:
size = filepath.stat().st_size
code_files.append((filepath, size))
except:
pass
else:
# 未识别的文件
try:
size = filepath.stat().st_size
skipped_files.append((filepath, size, "未识别"))
except:
pass
return code_files, skipped_files
def export_code(root_path, dest_path, include_docs=True):
"""导出代码文件到目标目录"""
root = Path(root_path)
dest = Path(dest_path)
if dest.exists():
print(f"目标目录 {dest} 已存在")
print("输入 'y' 覆盖,'n' 取消: ", end="")
if input().strip().lower() != 'y':
print("取消操作")
return
shutil.rmtree(dest)
code_files, _ = scan_code_files(root)
copied = 0
total_size = 0
for filepath, size in code_files:
rel_path = filepath.relative_to(root)
dest_file = dest / rel_path
# 可选:跳过文档
if not include_docs and filepath.suffix.lower() in {".md", ".txt", ".rst", ".pdf"}:
continue
dest_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(filepath, dest_file)
copied += 1
total_size += size
print(f"\n✅ 导出完成!")
print(f" 复制了 {copied} 个代码文件")
print(f" 总大小: {get_size_str(total_size)}")
print(f" 目标目录: {dest}")
def main():
import argparse
parser = argparse.ArgumentParser(description="只导出代码文件")
parser.add_argument("--root", "-r", default=".", help="项目根目录")
parser.add_argument("--output", "-o", help="输出目录(不指定则只统计)")
parser.add_argument("--no-docs", action="store_true", help="不包含文档文件(.md/.txt)")
parser.add_argument("--show-skipped", action="store_true", help="显示跳过的文件")
args = parser.parse_args()
root = Path(args.root).resolve()
print(f"扫描目录: {root}\n")
code_files, skipped_files = scan_code_files(root)
# 统计代码文件
total_code_size = sum(f[1] for f in code_files)
total_skipped_size = sum(f[1] for f in skipped_files)
# 按扩展名分组统计
ext_stats = defaultdict(lambda: {"count": 0, "size": 0})
for filepath, size in code_files:
ext = filepath.suffix.lower() or filepath.name
ext_stats[ext]["count"] += 1
ext_stats[ext]["size"] += size
# 按目录统计
dir_stats = defaultdict(lambda: {"count": 0, "size": 0})
for filepath, size in code_files:
rel_path = filepath.relative_to(root)
top_dir = rel_path.parts[0] if len(rel_path.parts) > 1 else "(根目录)"
dir_stats[top_dir]["count"] += 1
dir_stats[top_dir]["size"] += size
print("=" * 70)
print("📊 代码文件统计")
print("=" * 70)
print(f" ✅ 代码文件: {len(code_files)} 个, {get_size_str(total_code_size)}")
print(f" ⏭️ 跳过文件: {len(skipped_files)} 个, {get_size_str(total_skipped_size)}")
print("=" * 70)
print("\n📁 按顶级目录统计:")
sorted_dirs = sorted(dir_stats.items(), key=lambda x: x[1]["size"], reverse=True)
for dirname, stats in sorted_dirs:
print(f" {dirname:30} : {stats['count']:5} 个, {get_size_str(stats['size']):>10}")
print("\n📄 按文件类型统计 (前20):")
sorted_exts = sorted(ext_stats.items(), key=lambda x: x[1]["size"], reverse=True)
for ext, stats in sorted_exts[:20]:
print(f" {ext:15} : {stats['count']:5} 个, {get_size_str(stats['size']):>10}")
# 显示跳过的大文件
if args.show_skipped:
print("\n⏭️ 跳过的文件 (前30大):")
sorted_skipped = sorted(skipped_files, key=lambda x: x[1], reverse=True)
for filepath, size, reason in sorted_skipped[:30]:
rel_path = filepath.relative_to(root)
print(f" [{reason:8}] {rel_path} ({get_size_str(size)})")
# 导出
if args.output:
print(f"\n准备导出到: {args.output}")
export_code(root, args.output, include_docs=not args.no_docs)
else:
print(f"\n💡 提示: 使用 --output <目录> 导出代码文件")
print(f" 例如: python export_code_only.py --output D:\\HairMSNN_code")
if __name__ == "__main__":
main()