evita la extracció quan no es necesari

2026-02-19 12:41:34 +01:00
parent 39bd6eeb20
commit 980c85b7af
5 changed files with 178 additions and 51 deletions
@@ -6,7 +6,7 @@ import shutil

 import rarfile

-from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError
+from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError, list_archive_names
 from core.collision import CollisionPolicy, resolve_collision
 from core.result import ComicResult, StepResult
 from processors.validator import validate_archive
@@ -69,6 +69,18 @@ class Pipeline:

        return {}

+    def _needs_extraction(self, step_results: list, real_format: str) -> bool:
+        for step in self.steps:
+            if step in ("normalize_pages", "normalize_images", "convert_images"):
+                return True
+            if step == "convert" and needs_conversion(real_format, self.desired_format):
+                return True
+            if step == "clean":
+                trash = next((r for r in step_results if r.step == "check_trash"), None)
+                if trash and trash.warnings:
+                    return True
+        return False
+
    def run(self, path: str, confirm_fn=None) -> ComicResult:
        step_results = []

@@ -80,20 +92,31 @@ class Pipeline:

        real_format = detect_real_format(path)

-        # 2. Extraer una sola vez
+        # 2. Obtener lista de miembros sin extraer
+        try:
+            names = list_archive_names(path)
+        except Exception as exc:
+            step_results.append(StepResult(step="list", changed=False, errors=[str(exc)]))
+            return ComicResult(original_path=path, final_path=None, steps=step_results)
+
+        # 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer)
+        step_results += [
+            check_trash(names),
+            check_page_numbering(names),
+            check_image_extensions(names),
+            check_comicinfo(names),
+        ]
+
+        # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
+        if not self._needs_extraction(step_results, real_format):
+            return ComicResult(original_path=path, final_path=path, steps=step_results)
+
+        # 5. Extraer una sola vez
        temp_dir = tempfile.mkdtemp()
        try:
            extract_archive(path, temp_dir)

-            # 3. Ejecutar siempre los 4 content checks
-            step_results += [
-                check_trash(temp_dir),
-                check_page_numbering(temp_dir),
-                check_image_extensions(temp_dir),
-                check_comicinfo(temp_dir),
-            ]
-
-            # 4. Aplicar cada fix step sobre el directorio temporal
+            # 6. Aplicar cada fix step sobre el directorio temporal
            any_changed = False

            if "clean" in self.steps:
@@ -152,7 +175,7 @@ class Pipeline:
                    if conv_result.changed:
                        any_changed = True

-            # 5. Reempaquetar si hubo cambios o conversión de formato
+            # 7. Reempaquetar si hubo cambios o conversión de formato
            needs_repack = any_changed or (
                "convert" in self.steps
                and needs_conversion(real_format, self.desired_format)