import csv
import sys
import re

def parse_datetime(raw):
    ts = raw[:14]
    return f"{ts[0:4]}-{ts[4:6]}-{ts[6:8]} {ts[8:10]}:{ts[10:12]}:{ts[12:14]}"

def is_machine_id(s):
    # 32位HEX 或 长度>=8的字母数字混合
    return bool(re.fullmatch(r"[A-F0-9]{32}", s)) or (len(s) >= 8 and s.isalnum())

def is_order_no(s):
    if re.match(r"AGMS\d+(-\d{12})?", s):
        return True
    if re.match(r"GMS\d+-\d{12}", s):
        return True
    if re.fullmatch(r"\d{12,}", s):
        return True
    return False

def extract_datetimes(values):
    times = []
    for v in values:
        m = re.findall(r"(20\d{12,14})", v)
        for raw in m:
            times.append(parse_datetime(raw))
        if re.fullmatch(r"\d{15,}", v):
            times.append(parse_datetime(v[:14]))
    reg = times[0] if len(times) > 0 else ""
    exp = times[1] if len(times) > 1 else ""
    return reg, exp

def parse_raw_dump(in_csv, out_csv):
    records = []
    with open(in_csv, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        buffer = []
        for row in reader:
            raw = row["raw_data"].strip()
            if not raw or raw.lower() in ("infimum", "supremum") or raw.startswith(("===","'",":",",")):
                continue
            buffer.append((row["line_no"], raw))

            # 当遇到 machineID 时，尝试组装一条记录
            if is_machine_id(raw):
                machineID = raw
                key = ""
                order_no = ""
                reg, exp = "", ""

                # 向后看几行
                lookahead = []
                for ln, val in buffer[-1:]+list(reader):
                    lookahead.append((ln, val))
                    if not key and val != machineID and not is_order_no(val):
                        key = val
                    if not order_no and is_order_no(val):
                        order_no = val
                    if not reg and re.match(r"20\d{12,14}", val):
                        reg, exp = extract_datetimes([val])
                    if key and order_no:
                        break

                records.append([machineID, key, reg, exp, order_no, f"{row['source_file']}:{row['line_no']}"])
                buffer = []

    with open(out_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["machineID","key","register_time","expire_time","order_no","source"])
        writer.writerows(records)

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("用法: python3 parse_raw_dump.py raw_dump.csv parsed.csv")
        sys.exit(1)
    parse_raw_dump(sys.argv[1], sys.argv[2])
    print(f"已生成 {sys.argv[2]}")