77 lines
2.0 KiB
Python
77 lines
2.0 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
PART_SIZE = 40 * 1024 * 1024 * 1024 # 40 GB
|
|
CHUNK_SIZE = 1024 * 1024 * 1024 # 1 GB
|
|
|
|
|
|
def split_gz_file(file_path: Path):
|
|
file_size = file_path.stat().st_size
|
|
print(f"\n[+] Processing: {file_path}")
|
|
print(f" Size: {file_size / (1024**3):.2f} GB")
|
|
|
|
if file_size <= PART_SIZE:
|
|
print(" File smaller than 40GB, skip.")
|
|
return
|
|
|
|
prefix = file_path.stem # filename without .gz
|
|
parent = file_path.parent # original directory
|
|
|
|
# For example: bigfile.gz.part001
|
|
def part_name(n):
|
|
return parent / f"{prefix}.gz.part{n:03d}"
|
|
|
|
part_num = 1
|
|
written = 0
|
|
out_file = None
|
|
|
|
with open(file_path, "rb") as f:
|
|
while True:
|
|
chunk = f.read(CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
|
|
# Open new part
|
|
if written == 0:
|
|
out_path = part_name(part_num)
|
|
out_file = open(out_path, "wb")
|
|
print(f" [+] Creating {out_path.name}")
|
|
|
|
out_file.write(chunk)
|
|
written += len(chunk)
|
|
|
|
# Close part when reaching 40GB
|
|
if written >= PART_SIZE:
|
|
out_file.close()
|
|
print(f" [✓] Finished part {part_num:03d}")
|
|
part_num += 1
|
|
written = 0
|
|
|
|
if out_file:
|
|
out_file.close()
|
|
print(f" [✓] Finished part {part_num:03d}")
|
|
|
|
print(f"[OK] Finished splitting {file_path.name}\n")
|
|
|
|
|
|
def scan_and_split(root_dir: str):
|
|
root = Path(root_dir)
|
|
|
|
print(f"Scanning directory: {root.resolve()}\n")
|
|
|
|
for path in root.rglob("*.gz"):
|
|
try:
|
|
if path.is_file():
|
|
split_gz_file(path)
|
|
except Exception as e:
|
|
print(f"[ERROR] Failed on {path}: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python split_gz_auto.py <directory>")
|
|
exit(1)
|
|
|
|
scan_and_split(sys.argv[1])
|