import os from pathlib import Path PART_SIZE = 40 * 1024 * 1024 * 1024 # 40 GB CHUNK_SIZE = 1024 * 1024 * 1024 # 1 GB def split_gz_file(file_path: Path): file_size = file_path.stat().st_size print(f"\n[+] Processing: {file_path}") print(f" Size: {file_size / (1024**3):.2f} GB") if file_size <= PART_SIZE: print(" File smaller than 40GB, skip.") return prefix = file_path.stem # filename without .gz parent = file_path.parent # original directory # For example: bigfile.gz.part001 def part_name(n): return parent / f"{prefix}.gz.part{n:03d}" part_num = 1 written = 0 out_file = None with open(file_path, "rb") as f: while True: chunk = f.read(CHUNK_SIZE) if not chunk: break # Open new part if written == 0: out_path = part_name(part_num) out_file = open(out_path, "wb") print(f" [+] Creating {out_path.name}") out_file.write(chunk) written += len(chunk) # Close part when reaching 40GB if written >= PART_SIZE: out_file.close() print(f" [✓] Finished part {part_num:03d}") part_num += 1 written = 0 if out_file: out_file.close() print(f" [✓] Finished part {part_num:03d}") print(f"[OK] Finished splitting {file_path.name}\n") def scan_and_split(root_dir: str): root = Path(root_dir) print(f"Scanning directory: {root.resolve()}\n") for path in root.rglob("*.gz"): try: if path.is_file(): split_gz_file(path) except Exception as e: print(f"[ERROR] Failed on {path}: {e}") if __name__ == "__main__": import sys if len(sys.argv) != 2: print("Usage: python split_gz_auto.py ") exit(1) scan_and_split(sys.argv[1])