import csv import sys import re def load_strings(filename): with open(filename, "r", encoding="utf-8", errors="ignore") as f: return [line.strip() for line in f if line.strip()] def parse_datetime(raw): ts = raw[:14] return f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]} {ts[8:10]}:{ts[10:12]}:{ts[12:14]}" def extract_datetimes(values): """在当前记录的所有字段里提取时间戳""" times = [] for v in values: m = re.findall(r"(20\d{12,14})", v) for raw in m: times.append(parse_datetime(raw)) reg = times[0] if len(times) > 0 else "" exp = times[1] if len(times) > 1 else "" return reg, exp def is_noise(s): if s.lower() in ("infimum", "supremum"): return True if s.startswith("==="): return True if len(s) < 2: return True return False def looks_like_order_no(s): if re.match(r"AGMS\d+(-\d+)?", s): return True if re.match(r"GMS\d+-\d+", s): return True if re.fullmatch(r"\d{12,}", s): return True return False def group_records(strings): records = [] buffer = [] for s in strings: if is_noise(s): continue buffer.append(s) if len(buffer) >= 2: # 至少有 machineID 和 key machineID = buffer[0] key = buffer[1] order_no = "" # 在剩余字段里找 order_no for v in buffer[2:]: if looks_like_order_no(v): order_no = v break reg, exp = extract_datetimes(buffer) records.append([machineID, key, reg, exp, order_no]) buffer = [] return records if __name__ == "__main__": if len(sys.argv) < 3: print("用法: python3 recover_table_linewise.py <输出CSV文件> ") sys.exit(1) out_csv = sys.argv[1] all_strings = [] for fname in sys.argv[2:]: all_strings.extend(load_strings(fname)) records = group_records(all_strings) with open(out_csv, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["machineID", "key", "register_time", "expire_time", "order_no"]) writer.writerows(records) print(f"已生成 {out_csv},共 {len(records)} 条记录")