import csv import sys import re def parse_datetime(raw): ts = raw[:14] return f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]} {ts[8:10]}:{ts[10:12]}:{ts[12:14]}" def is_machine_id(s): # 32位HEX 或 长度>=8的字母数字混合 return bool(re.fullmatch(r"[A-F0-9]{32}", s)) or (len(s) >= 8 and s.isalnum()) def is_order_no(s): if re.match(r"AGMS\d+(-\d{12})?", s): return True if re.match(r"GMS\d+-\d{12}", s): return True if re.fullmatch(r"\d{12,}", s): return True return False def extract_datetimes(values): times = [] for v in values: m = re.findall(r"(20\d{12,14})", v) for raw in m: times.append(parse_datetime(raw)) if re.fullmatch(r"\d{15,}", v): times.append(parse_datetime(v[:14])) reg = times[0] if len(times) > 0 else "" exp = times[1] if len(times) > 1 else "" return reg, exp def parse_raw_dump(in_csv, out_csv): records = [] with open(in_csv, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) buffer = [] for row in reader: raw = row["raw_data"].strip() if not raw or raw.lower() in ("infimum", "supremum") or raw.startswith(("===","'",":",",")): continue buffer.append((row["line_no"], raw)) # 当遇到 machineID 时,尝试组装一条记录 if is_machine_id(raw): machineID = raw key = "" order_no = "" reg, exp = "", "" # 向后看几行 lookahead = [] for ln, val in buffer[-1:]+list(reader): lookahead.append((ln, val)) if not key and val != machineID and not is_order_no(val): key = val if not order_no and is_order_no(val): order_no = val if not reg and re.match(r"20\d{12,14}", val): reg, exp = extract_datetimes([val]) if key and order_no: break records.append([machineID, key, reg, exp, order_no, f"{row['source_file']}:{row['line_no']}"]) buffer = [] with open(out_csv, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["machineID","key","register_time","expire_time","order_no","source"]) writer.writerows(records) if __name__ == "__main__": if len(sys.argv) < 3: print("用法: python3 parse_raw_dump.py raw_dump.csv parsed.csv") sys.exit(1) parse_raw_dump(sys.argv[1], sys.argv[2]) print(f"已生成 {sys.argv[2]}")