import csv import sys import re import os def load_strings(filename): with open(filename, "r", encoding="utf-8", errors="ignore") as f: return [line.strip() for line in f if line.strip()] def parse_datetime(raw): ts = raw[:14] return f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]} {ts[8:10]}:{ts[10:12]}:{ts[12:14]}" def extract_datetimes_from_page(filename): with open(filename, "rb") as f: data = f.read() matches = re.findall(rb"20\d{12,14}", data) results = [] for m in matches: try: s = m.decode("utf-8") results.append(parse_datetime(s)) except Exception: continue return results def is_noise(s): if s.lower() in ("infimum", "supremum"): return True if s.startswith("==="): return True if len(s) < 2: return True return False def looks_like_order_no(s): if re.match(r"AGMS\d+(-\d+)?", s): return True if re.match(r"GMS\d+-\d+", s): return True if re.fullmatch(r"\d{12,}", s): return True return False def group_records(strings, datetimes): records = [] buffer = [] dt_iter = iter(datetimes) # 用迭代器依次分配时间戳 for s in strings: if is_noise(s): continue buffer.append(s) if len(buffer) >= 2: # 至少有 machineID 和 key machineID = buffer[0] key = buffer[1] order_no = "" for v in buffer[2:]: if looks_like_order_no(v): order_no = v break # 从二进制页里分配时间戳 reg = next(dt_iter, "") exp = next(dt_iter, "") records.append([machineID, key, reg, exp, order_no]) buffer = [] return records if __name__ == "__main__": if len(sys.argv) < 4: print("用法: python3 recover_table.py <输出CSV文件> ") sys.exit(1) out_csv = sys.argv[1] # 前半部分参数是 strings 文件,后半部分是 page 文件 args = sys.argv[2:] strings_files = [a for a in args if a.endswith(".txt")] page_files = [a for a in args if a.endswith(".page")] all_strings = [] for fname in strings_files: all_strings.extend(load_strings(fname)) all_datetimes = [] for pf in page_files: all_datetimes.extend(extract_datetimes_from_page(pf)) records = group_records(all_strings, all_datetimes) with open(out_csv, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["machineID", "key", "register_time", "expire_time", "order_no"]) writer.writerows(records) print(f"已生成 {out_csv},共 {len(records)} 条记录")