From f374bdb023170e487290c978a1eb72b874227b56 Mon Sep 17 00:00:00 2001 From: Sergio Date: Wed, 18 Feb 2026 13:07:03 +0100 Subject: [PATCH] cleaner.py i standarizer.py --- core/constants.py | 10 +++ main.py | 71 ++++++++++++++++++- processors/cleaner.py | 139 +++++++++++++++++++++++++++++++++++++ processors/converter.py | 72 +++++++++++++++++++ processors/standardizer.py | 55 +++++++++++++++ processors/validator.py | 2 + 6 files changed, 346 insertions(+), 3 deletions(-) create mode 100644 core/constants.py create mode 100644 processors/cleaner.py create mode 100644 processors/converter.py create mode 100644 processors/standardizer.py diff --git a/core/constants.py b/core/constants.py new file mode 100644 index 0000000..d7ea31b --- /dev/null +++ b/core/constants.py @@ -0,0 +1,10 @@ +# core/constants.py + +IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"} + +TRASH_FILES = { + "thumbs.db", + ".ds_store", + "__macosx", + "desktop.ini", +} diff --git a/main.py b/main.py index a42c74b..31dbd90 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,10 @@ import argparse from core.scanner import find_comic_files from processors.validator import validate_comic +from processors.cleaner import clean_comic +from processors.converter import convert_comic +from processors.standardizer import standardize_comic + def parse_args(): parser = argparse.ArgumentParser( @@ -28,9 +32,31 @@ def parse_args(): help="Validar los archivos encontrados" ) - # Aquí podremos añadir más opciones en el futuro: - # parser.add_argument("--convertir", action="store_true", help="Convertir CBR a CBZ") - # parser.add_argument("--organizar", action="store_true", help="Organizar la colección") + parser.add_argument( + "--limpiar", + action="store_true", + help="Eliminar archivos basura y reconstruir CBZ limpios" + ) + + parser.add_argument( + "--convertir", + action="store_true", + help="Convertir los archivos al formato indicado con --formato" + ) + + parser.add_argument( + "--formato", + type=str, + choices=["cbz", "cbr"], + default="cbz", + help="Formato final deseado (por defecto: cbz)" + ) + + parser.add_argument( + "--estandarizar", + action="store_true", + help="Pipeline completo: limpiar, convertir y normalizar" + ) return parser.parse_args() @@ -45,12 +71,18 @@ def main(): print("No se encontraron archivos .cbr o .cbz") return + # ------------------------------------------------------------ + # LISTAR + # ------------------------------------------------------------ if args.listar: print("Archivos encontrados:") for f in comic_files: print(f" - {f}") print() + # ------------------------------------------------------------ + # VALIDAR + # ------------------------------------------------------------ if args.validar: print("Validando archivos...\n") for f in comic_files: @@ -69,6 +101,39 @@ def main(): print() + # ------------------------------------------------------------ + # LIMPIAR + # ------------------------------------------------------------ + if args.limpiar: + print("Limpiando archivos...\n") + for f in comic_files: + result = clean_comic(f) + print(result) + print() + + # ------------------------------------------------------------ + # CONVERTIR + # ------------------------------------------------------------ + if args.convertir: + print(f"Convirtiendo archivos al formato {args.formato}...\n") + for f in comic_files: + info = convert_comic(f, args.formato) + if info["needs_conversion"]: + print(f"Convertido: {f} → {info['target_path']}") + else: + print(f"Sin cambios: {f}") + print() + + # ------------------------------------------------------------ + # ESTANDARIZAR + # ------------------------------------------------------------ + if args.estandarizar: + print("Estandarizando archivos...\n") + for f in comic_files: + result = standardize_comic(f, args.formato) + print(result) + print() + if __name__ == "__main__": main() diff --git a/processors/cleaner.py b/processors/cleaner.py new file mode 100644 index 0000000..bb4d01e --- /dev/null +++ b/processors/cleaner.py @@ -0,0 +1,139 @@ +# processors/cleaner.py + +import os +import zipfile +import rarfile +import tempfile +import shutil + +from core.constants import TRASH_FILES +from processors.validator import try_open_rar, try_open_zip + + +class CleanResult: + def __init__(self, original_path): + self.original_path = original_path + self.cleaned_path = None + self.removed_files = [] + self.repacked = False + self.converted_to_cbz = False + + def pretty_removed_files(self): + if not self.removed_files: + return " No se eliminaron archivos\n" + msg = f" Archivos eliminados ({len(self.removed_files)}):\n" + for f in self.removed_files: + msg += f" - {f}\n" + return msg + + def __str__(self): + msg = f"Limpieza de: {self.original_path}\n" + msg += self.pretty_removed_files() + if self.repacked: + msg += " Archivo reconstruido\n" + if self.converted_to_cbz: + msg += " Convertido a CBZ\n" + msg += f" Resultado final: {self.cleaned_path}" + return msg + + +# ------------------------------------------------------------ +# 1) Limpieza de una carpeta ya extraída +# ------------------------------------------------------------ + +def clean_folder(folder_path): + """ + Elimina archivos basura dentro de una carpeta ya extraída. + Devuelve una lista con las rutas relativas de los archivos eliminados. + """ + removed = [] + + for root, _, files in os.walk(folder_path): + for f in files: + if f.lower() in TRASH_FILES: + full = os.path.join(root, f) + rel = os.path.relpath(full, folder_path) + os.remove(full) + removed.append(rel) + + return removed + + +# ------------------------------------------------------------ +# 2) Limpieza de un archivo completo (modo actual) +# ------------------------------------------------------------ + +def clean_comic(path, output_path=None): + """ + Limpia un archivo CBR/CBZ: + - elimina basura + - convierte CBR → CBZ + - reconstruye solo si es necesario + """ + result = CleanResult(path) + ext = os.path.splitext(path)[1].lower() + + # 1) Abrir archivo + if ext == ".cbr": + archive = try_open_rar(path) + if archive: + real_format = "rar" + else: + archive = try_open_zip(path) + if archive: + real_format = "zip" + else: + raise Exception(f"No se puede abrir {path}") + + elif ext == ".cbz": + archive = try_open_zip(path) + if archive: + real_format = "zip" + else: + raise Exception(f"No se puede abrir {path}") + + # 2) Extraer a carpeta temporal + temp_dir = tempfile.mkdtemp() + archive.extractall(temp_dir) + archive.close() + + # 3) Limpiar carpeta + removed = clean_folder(temp_dir) + result.removed_files = removed + + # 4) Determinar si hay que reconstruir + changes_needed = False + + if removed: + changes_needed = True + + if ext == ".cbr": + changes_needed = True + result.converted_to_cbz = True + + if not changes_needed: + result.cleaned_path = path + shutil.rmtree(temp_dir) + return result + + # 5) Ruta final + if output_path: + final_path = output_path + else: + base, _ = os.path.splitext(path) + final_path = base + ".cbz" + + result.cleaned_path = final_path + + # 6) Reempaquetar como CBZ + with zipfile.ZipFile(final_path, "w", zipfile.ZIP_DEFLATED) as new_zip: + for root, _, files in os.walk(temp_dir): + for f in files: + full = os.path.join(root, f) + rel = os.path.relpath(full, temp_dir) + new_zip.write(full, rel) + + result.repacked = True + + shutil.rmtree(temp_dir) + return result diff --git a/processors/converter.py b/processors/converter.py new file mode 100644 index 0000000..3398d6d --- /dev/null +++ b/processors/converter.py @@ -0,0 +1,72 @@ +# processors/converter.py + +import os +import zipfile +import rarfile +import tempfile +import shutil + + +def decide_target_format(original_path, desired_format="cbz"): + """ + Decide si el archivo debe convertirse y cuál será su ruta final. + No realiza la conversión. + """ + base, _ = os.path.splitext(original_path) + target_path = f"{base}.{desired_format.lower()}" + + needs_conversion = not original_path.lower().endswith(desired_format.lower()) + + return { + "needs_conversion": needs_conversion, + "target_format": desired_format.lower(), + "target_path": target_path + } + + +def convert_comic(path, desired_format="cbz"): + """ + Convierte un archivo CBR/CBZ al formato deseado. + NO limpia basura. + NO renombra páginas. + NO reordena nada. + """ + info = decide_target_format(path, desired_format) + + if not info["needs_conversion"]: + return info # Nada que hacer + + ext = os.path.splitext(path)[1].lower() + + # 1) Abrir archivo original + if ext == ".cbr": + archive = rarfile.RarFile(path, "r") + elif ext == ".cbz": + archive = zipfile.ZipFile(path, "r") + else: + raise Exception("Formato no soportado") + + # 2) Extraer a carpeta temporal + temp_dir = tempfile.mkdtemp() + archive.extractall(temp_dir) + archive.close() + + # 3) Reempaquetar en el formato deseado + target_path = info["target_path"] + + if desired_format == "cbz": + with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as new_zip: + for root, _, files in os.walk(temp_dir): + for f in files: + full = os.path.join(root, f) + rel = os.path.relpath(full, temp_dir) + new_zip.write(full, rel) + + elif desired_format == "cbr": + # rarfile no puede crear RAR → hay que usar "rar" externo + raise NotImplementedError("Crear CBR requiere la herramienta 'rar' instalada") + + # 4) Limpiar temporal + shutil.rmtree(temp_dir) + + return info diff --git a/processors/standardizer.py b/processors/standardizer.py new file mode 100644 index 0000000..6083012 --- /dev/null +++ b/processors/standardizer.py @@ -0,0 +1,55 @@ +# processors/standardizer.py + +from processors.cleaner import clean_comic +from processors.converter import convert_comic + +class StandardizeResult: + def __init__(self, original_path): + self.original_path = original_path + self.cleaned = None + self.converted = None + self.final_path = None + + def __str__(self): + msg = f"Estandarización de: {self.original_path}\n" + if self.cleaned: + msg += f" Limpieza: OK ({len(self.cleaned.removed_files)} archivos eliminados)\n" + else: + msg += " Limpieza: no realizada\n" + + if self.converted: + if self.converted["needs_conversion"]: + msg += f" Conversión: OK → {self.converted['target_path']}\n" + else: + msg += " Conversión: no necesaria\n" + + msg += f" Resultado final: {self.final_path}\n" + return msg + + +def standardize_comic(path, desired_format="cbz"): + """ + Pipeline básico: + 1. Limpiar + 2. Convertir + """ + result = StandardizeResult(path) + + # 1) Limpiar + clean_result = clean_comic(path) + result.cleaned = clean_result + + # El archivo resultante tras limpiar + cleaned_path = clean_result.cleaned_path + + # 2) Convertir + convert_result = convert_comic(cleaned_path, desired_format) + result.converted = convert_result + + # Ruta final + if convert_result["needs_conversion"]: + result.final_path = convert_result["target_path"] + else: + result.final_path = cleaned_path + + return result diff --git a/processors/validator.py b/processors/validator.py index eaa523e..93c4ff6 100644 --- a/processors/validator.py +++ b/processors/validator.py @@ -3,6 +3,8 @@ import os import zipfile import rarfile +from core.constants import IMAGE_EXTENSIONS, TRASH_FILES + IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"} TRASH_FILES = {"thumbs.db", ".ds_store"}