import csv import sys import os def export_text_file(path, writer): with open(path, "r", encoding="utf-8", errors="ignore") as f: for i, line in enumerate(f, start=1): # 不加工:保留原始行(含引号、逗号、空格),仅剔除换行符 raw = line.rstrip("\r\n") writer.writerow([path, i, raw]) def export_page_file_ascii(path, writer): # 提示:如果你也要“原始字节流”,建议另存为 .bin 不进 CSV。 # 这里仅抽取可见 ASCII 段,保留原样字符串片段以便人工比对。 try: with open(path, "rb") as f: data = f.read() # 简单拆段:连续可打印 ASCII(0x20-0x7E)且长度>=3 buf = [] line_no = 0 for b in data: if 0x20 <= b <= 0x7E: buf.append(chr(b)) else: if len(buf) >= 3: line_no += 1 writer.writerow([path, line_no, "".join(buf)]) buf = [] if len(buf) >= 3: line_no += 1 writer.writerow([path, line_no, "".join(buf)]) except Exception as e: # 失败也记一条,便于排查 writer.writerow([path, 0, f"[READ_ERROR] {e}"]) def main(): if len(sys.argv) < 3: print("用法: python3 export_raw.py <输出CSV> <输入文件...>") print("说明: 不做任何解析或分类,逐行/逐段原样导出到 raw_data 列。") sys.exit(1) out_csv = sys.argv[1] inputs = sys.argv[2:] with open(out_csv, "w", newline="", encoding="utf-8") as out: writer = csv.writer(out) writer.writerow(["source_file", "line_no", "raw_data"]) for path in inputs: ext = os.path.splitext(path)[1].lower() if ext in [".txt", ".csv", ".log"]: export_text_file(path, writer) elif ext in [".page", ".ibd", ".bin"]: # 二进制默认导出可见 ASCII 段;不做时间/结构解析 export_page_file_ascii(path, writer) else: # 默认当作文本处理 export_text_file(path, writer) print(f"已生成 {out_csv}") if __name__ == "__main__": main()