diff --git a/split2upload.py b/split2upload.py new file mode 100644 index 0000000..237ddc0 --- /dev/null +++ b/split2upload.py @@ -0,0 +1,76 @@ +import os +from pathlib import Path + +PART_SIZE = 40 * 1024 * 1024 * 1024 # 40 GB +CHUNK_SIZE = 1024 * 1024 * 1024 # 1 GB + + +def split_gz_file(file_path: Path): + file_size = file_path.stat().st_size + print(f"\n[+] Processing: {file_path}") + print(f" Size: {file_size / (1024**3):.2f} GB") + + if file_size <= PART_SIZE: + print(" File smaller than 40GB, skip.") + return + + prefix = file_path.stem # filename without .gz + parent = file_path.parent # original directory + + # For example: bigfile.gz.part001 + def part_name(n): + return parent / f"{prefix}.gz.part{n:03d}" + + part_num = 1 + written = 0 + out_file = None + + with open(file_path, "rb") as f: + while True: + chunk = f.read(CHUNK_SIZE) + if not chunk: + break + + # Open new part + if written == 0: + out_path = part_name(part_num) + out_file = open(out_path, "wb") + print(f" [+] Creating {out_path.name}") + + out_file.write(chunk) + written += len(chunk) + + # Close part when reaching 40GB + if written >= PART_SIZE: + out_file.close() + print(f" [✓] Finished part {part_num:03d}") + part_num += 1 + written = 0 + + if out_file: + out_file.close() + print(f" [✓] Finished part {part_num:03d}") + + print(f"[OK] Finished splitting {file_path.name}\n") + + +def scan_and_split(root_dir: str): + root = Path(root_dir) + + print(f"Scanning directory: {root.resolve()}\n") + + for path in root.rglob("*.gz"): + try: + if path.is_file(): + split_gz_file(path) + except Exception as e: + print(f"[ERROR] Failed on {path}: {e}") + + +if __name__ == "__main__": + import sys + if len(sys.argv) != 2: + print("Usage: python split_gz_auto.py ") + exit(1) + + scan_and_split(sys.argv[1])