From 980c85b7af42fdac74258588b9983f00171d6733 Mon Sep 17 00:00:00 2001 From: Sergio Date: Thu, 19 Feb 2026 12:41:34 +0100 Subject: [PATCH] =?UTF-8?q?evita=20la=20extracci=C3=B3=20quan=20no=20es=20?= =?UTF-8?q?necesari?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/archive.py | 6 +++ core/pipeline.py | 47 ++++++++++++++++------ core/summary.py | 96 ++++++++++++++++++++++++++++++++++++++++++++ main.py | 7 ++++ processors/checks.py | 73 ++++++++++++++++----------------- 5 files changed, 178 insertions(+), 51 deletions(-) create mode 100644 core/summary.py diff --git a/core/archive.py b/core/archive.py index a6c37af..b30edd9 100644 --- a/core/archive.py +++ b/core/archive.py @@ -37,6 +37,12 @@ def open_archive(path: str): raise ArchiveError(f"Formato desconocido o archivo corrupto: {path}") +def list_archive_names(path: str) -> list[str]: + """Lista los miembros del archivo sin extraerlo. Lanza ArchiveError si falla.""" + with open_archive(path) as arc: + return arc.namelist() + + def extract_archive(path: str, dest_dir: str) -> str: """Extrae el archivo en dest_dir. Devuelve dest_dir.""" archive = open_archive(path) diff --git a/core/pipeline.py b/core/pipeline.py index 2ee7f2b..c724cf8 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -6,7 +6,7 @@ import shutil import rarfile -from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError +from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError, list_archive_names from core.collision import CollisionPolicy, resolve_collision from core.result import ComicResult, StepResult from processors.validator import validate_archive @@ -69,6 +69,18 @@ class Pipeline: return {} + def _needs_extraction(self, step_results: list, real_format: str) -> bool: + for step in self.steps: + if step in ("normalize_pages", "normalize_images", "convert_images"): + return True + if step == "convert" and needs_conversion(real_format, self.desired_format): + return True + if step == "clean": + trash = next((r for r in step_results if r.step == "check_trash"), None) + if trash and trash.warnings: + return True + return False + def run(self, path: str, confirm_fn=None) -> ComicResult: step_results = [] @@ -80,20 +92,31 @@ class Pipeline: real_format = detect_real_format(path) - # 2. Extraer una sola vez + # 2. Obtener lista de miembros sin extraer + try: + names = list_archive_names(path) + except Exception as exc: + step_results.append(StepResult(step="list", changed=False, errors=[str(exc)])) + return ComicResult(original_path=path, final_path=None, steps=step_results) + + # 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer) + step_results += [ + check_trash(names), + check_page_numbering(names), + check_image_extensions(names), + check_comicinfo(names), + ] + + # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo + if not self._needs_extraction(step_results, real_format): + return ComicResult(original_path=path, final_path=path, steps=step_results) + + # 5. Extraer una sola vez temp_dir = tempfile.mkdtemp() try: extract_archive(path, temp_dir) - # 3. Ejecutar siempre los 4 content checks - step_results += [ - check_trash(temp_dir), - check_page_numbering(temp_dir), - check_image_extensions(temp_dir), - check_comicinfo(temp_dir), - ] - - # 4. Aplicar cada fix step sobre el directorio temporal + # 6. Aplicar cada fix step sobre el directorio temporal any_changed = False if "clean" in self.steps: @@ -152,7 +175,7 @@ class Pipeline: if conv_result.changed: any_changed = True - # 5. Reempaquetar si hubo cambios o conversión de formato + # 7. Reempaquetar si hubo cambios o conversión de formato needs_repack = any_changed or ( "convert" in self.steps and needs_conversion(real_format, self.desired_format) diff --git a/core/summary.py b/core/summary.py new file mode 100644 index 0000000..827ecb7 --- /dev/null +++ b/core/summary.py @@ -0,0 +1,96 @@ +# core/summary.py + +from core.result import ComicResult + +_CORRUPT_PATTERNS = ("BadRarFile", "BadZipFile", "corrupto", "Failed to read") +_BORDER = "═" * 42 + + +class SummaryCollector: + def __init__(self): + self._results: list[ComicResult] = [] + + def add(self, result: ComicResult) -> None: + self._results.append(result) + + def _is_corrupt(self, result: ComicResult) -> bool: + all_errors = [e for s in result.steps for e in s.errors] + return any( + pattern in err + for err in all_errors + for pattern in _CORRUPT_PATTERNS + ) + + def render(self) -> str: + results = self._results + total = len(results) + if total == 0: + return "" + + # Categorize + errors = [r for r in results if not r.ok() or r.final_path is None] + ok_results = [r for r in results if r.ok() and r.final_path is not None] + + modified = [r for r in ok_results if any(s.changed for s in r.steps)] + warnings_only = [ + r for r in ok_results + if not any(s.changed for s in r.steps) and r.has_issues() + ] + no_changes = [ + r for r in ok_results + if not any(s.changed for s in r.steps) and not r.has_issues() + ] + + corrupt = [r for r in errors if self._is_corrupt(r)] + other_errors = [r for r in errors if not self._is_corrupt(r)] + + # Operation breakdown (among modified) + def count_step(step_names: list[str]) -> int: + return sum( + 1 for r in modified + if any(s.step in step_names and s.changed for s in r.steps) + ) + + cleaned = count_step(["clean"]) + pages_normalized = count_step(["normalize_pages"]) + images_converted = count_step(["normalize_images", "convert_images"]) + format_converted = count_step(["convert"]) + + lines = [ + _BORDER, + " RESUMEN DEL PROCESAMIENTO", + _BORDER, + f" Total procesados : {total:>3}", + f" Sin cambios : {len(no_changes):>3}", + f" Modificados : {len(modified):>3}", + ] + if modified: + if cleaned: + lines.append(f" · Limpiados : {cleaned:>3}") + if pages_normalized: + lines.append(f" · Páginas normalizadas : {pages_normalized:>3}") + if images_converted: + lines.append(f" · Imágenes convertidas : {images_converted:>3}") + if format_converted: + lines.append(f" · Formato convertido : {format_converted:>3}") + lines.append(f" Advertencias : {len(warnings_only):>3}") + lines.append(f" Errores : {len(errors):>3}") + if errors: + lines.append(f" · Corruptos : {len(corrupt):>3}") + lines.append(f" · Otros errores : {len(other_errors):>3}") + lines.append(_BORDER) + + if corrupt: + lines.append("") + lines.append("Archivos corruptos:") + for r in corrupt: + lines.append(f" {r.original_path}") + + if other_errors: + lines.append("") + lines.append("Otros errores:") + for r in other_errors: + all_errs = [e for s in r.steps for e in s.errors] + lines.append(f" {r.original_path} — {'; '.join(all_errs)}") + + return "\n".join(lines) diff --git a/main.py b/main.py index bbd70d3..047fb2f 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import argparse from core.scanner import find_comic_files from core.pipeline import Pipeline +from core.summary import SummaryCollector _COL_W = 30 _SEP = "─" * 44 @@ -84,11 +85,14 @@ def main(): if args.validar: pipeline = Pipeline(steps=[]) + collector = SummaryCollector() for f in comic_files: result = pipeline.run(f) + collector.add(result) if result.has_issues(): print(result.full_report()) print() + print(collector.render()) return # --- Construir steps --- @@ -111,10 +115,13 @@ def main(): desired_format=args.formato, desired_image_format="." + args.formato_imagen, ) + collector = SummaryCollector() for f in comic_files: print(f"\n=== {f} ===") result = pipeline.run(f, confirm_fn=confirm_fn) print(result.summary()) + collector.add(result) + print(f"\n{collector.render()}") if __name__ == "__main__": diff --git a/processors/checks.py b/processors/checks.py index ed759a9..df5fd36 100644 --- a/processors/checks.py +++ b/processors/checks.py @@ -7,18 +7,21 @@ from core.constants import IMAGE_EXTENSIONS, TRASH_FILES from core.result import StepResult -def check_trash(work_dir: str) -> StepResult: +def check_trash(names: list[str]) -> StepResult: """Detecta ficheros y directorios de basura presentes en el archivo.""" found = [] - for root, dirs, files in os.walk(work_dir): - for name in files: - if name.lower() in TRASH_FILES: - found.append(os.path.relpath(os.path.join(root, name), work_dir)) - for name in dirs: - if name.lower() in TRASH_FILES: - found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/") + reported = set() + for name in names: + parts = name.replace("\\", "/").rstrip("/").split("/") + for i, part in enumerate(parts): + if part.lower() in TRASH_FILES: + key = "/".join(parts[: i + 1]) + if key not in reported: + found.append(key) + reported.add(key) + break - warnings = [f"Basura detectada: {f}" for f in found] + warnings = [f"Basura detectada: {f}" for f in sorted(found)] return StepResult(step="check_trash", changed=False, warnings=warnings) @@ -27,15 +30,16 @@ def _natural_sort_key(name: str): return [int(p) if p.isdigit() else p.lower() for p in parts] -def check_page_numbering(work_dir: str) -> StepResult: +def check_page_numbering(names: list[str]) -> StepResult: """Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos.""" - images = [] - for root, _, files in os.walk(work_dir): - for f in files: - if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS: - images.append(f) - - images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0])) + images = sorted( + [ + name.replace("\\", "/").rsplit("/", 1)[-1] + for name in names + if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS + ], + key=lambda f: _natural_sort_key(os.path.splitext(f)[0]), + ) total = len(images) if total == 0: return StepResult(step="check_page_numbering", changed=False) @@ -82,16 +86,15 @@ def check_page_numbering(work_dir: str) -> StepResult: return StepResult(step="check_page_numbering", changed=False, warnings=warnings) -def check_image_extensions(work_dir: str) -> StepResult: +def check_image_extensions(names: list[str]) -> StepResult: """Detecta mezcla de formatos de imagen en el archivo.""" ext_set = set() - for root, _, files in os.walk(work_dir): - for f in files: - ext = os.path.splitext(f)[1].lower() - if ext in IMAGE_EXTENSIONS: - # Normalizar .jpeg → .jpg - normalized = ".jpg" if ext == ".jpeg" else ext - ext_set.add(normalized) + for name in names: + ext = os.path.splitext(name)[1].lower() + if ext in IMAGE_EXTENSIONS: + # Normalizar .jpeg → .jpg + normalized = ".jpg" if ext == ".jpeg" else ext + ext_set.add(normalized) warnings = [] if len(ext_set) > 1: @@ -101,19 +104,11 @@ def check_image_extensions(work_dir: str) -> StepResult: return StepResult(step="check_image_extensions", changed=False, warnings=warnings) -def check_comicinfo(work_dir: str) -> StepResult: +def check_comicinfo(names: list[str]) -> StepResult: """Detecta ausencia de ComicInfo.xml.""" - warnings = [] - found = False - for root, _, files in os.walk(work_dir): - for f in files: - if f.lower() == "comicinfo.xml": - found = True - break - if found: - break - - if not found: - warnings.append("Falta ComicInfo.xml") - + found = any( + name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml" + for name in names + ) + warnings = [] if found else ["Falta ComicInfo.xml"] return StepResult(step="check_comicinfo", changed=False, warnings=warnings)