import re import sys def extract_strings(filename, min_length=4): with open(filename, "rb") as f: data = f.read() # 提取 UTF-8 可见字符串 strings = re.findall(rb"[ -~]{%d,}" % min_length, data) return [s.decode("utf-8", errors="ignore") for s in strings] if __name__ == "__main__": if len(sys.argv) < 2: print("用法: python3 extract_strings.py ") sys.exit(1) page_file = sys.argv[1] strings = extract_strings(page_file) print(f"=== {page_file} 中提取的可见字符串 ===") for s in strings: print(s)