detecció de fitxers extranys en el fitxer

2026-02-20 12:50:03 +01:00
parent eeb9ce6879
commit 448c50b846
5 changed files with 38 additions and 11 deletions
@@ -8,3 +8,5 @@ TRASH_FILES = {
    "__macosx",
    "desktop.ini",
 }
 FOREIGN_ALLOWED = {"comicinfo.xml"}  # nombres exactos (lowercase) permitidos aunque no sean imágenes
@@ -18,6 +18,7 @@ from processors.checks import (
    check_page_numbering,
    check_image_extensions,
    check_comicinfo,
    check_foreign,
 )
 from processors.page_normalizer import normalize_pages, preview_normalize_pages
 from processors.image_normalizer import (
@@ -45,12 +46,11 @@ class Pipeline:
    def _compute_preview(self, step: str, temp_dir: str, step_results: list) -> dict:
        if step == "clean":
-            trash_result = next((r for r in step_results if r.step == "check_trash"), None)
+            trash_r   = next((r for r in step_results if r.step == "check_trash"),   None)
-            if trash_result:
+            foreign_r = next((r for r in step_results if r.step == "check_foreign"), None)
-                prefix = "Basura detectada: "
+            prefix = "Basura detectada: "
-                items = [w.removeprefix(prefix) for w in trash_result.warnings if w.startswith(prefix)]
+            items  = [w.removeprefix(prefix) for w in (trash_r.warnings   if trash_r   else []) if w.startswith(prefix)]
-            else:
+            items += [w.removeprefix("Fichero extraño: ") for w in (foreign_r.warnings if foreign_r else [])]
                items = []
            return {"items": items}
        elif step == "normalize_pages":
@@ -81,8 +81,9 @@ class Pipeline:
                if ext != self.desired_format:
                    return True
            if step == "clean":
-                trash = next((r for r in step_results if r.step == "check_trash"), None)
+                trash   = next((r for r in step_results if r.step == "check_trash"),   None)
-                if trash and trash.warnings:
+                foreign = next((r for r in step_results if r.step == "check_foreign"), None)
                if (trash and trash.warnings) or (foreign and foreign.warnings):
                    return True
        return False
@@ -104,12 +105,13 @@ class Pipeline:
            step_results.append(StepResult(step="list", changed=False, errors=[str(exc)]))
            return ComicResult(original_path=path, final_path=None, steps=step_results)
-        # 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer)
+        # 3. Ejecutar siempre los content checks sobre los nombres (sin extraer)
        step_results += [
            check_trash(names),
            check_page_numbering(names),
            check_image_extensions(names),
            check_comicinfo(names),
            check_foreign(names),
        ]
        # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
@@ -131,6 +131,7 @@ class SummaryCollector:
        categories = [
            ("Extensión incorrecta",  "validate",               lambda w: "Extensión incorrecta" in w, ["convert"],                           "convertido"),
            ("Basura detectada",       "check_trash",            lambda w: True,                         ["clean"],                             "limpiado"),
            ("Ficheros extraños",      "check_foreign",          lambda w: True,                         ["clean"],                             "limpiado"),
            ("Numeración de páginas",  "check_page_numbering",   lambda w: True,                         ["normalize_pages"],                    "renumerado"),
            ("Imágenes mezcladas",     "check_image_extensions", lambda w: True,                         ["normalize_images", "convert_images"], "normalizado"),
            ("Sin ComicInfo.xml",      "check_comicinfo",        lambda w: True,                         [],                                    None),
@@ -155,6 +156,9 @@ class SummaryCollector:
                    elif step_name == "check_trash":
                        items = [w.removeprefix("Basura detectada: ") for w in msgs]
                        entries.append((r.original_path, ", ".join(items), annotation))
                    elif step_name == "check_foreign":
                        items = [w.removeprefix("Fichero extraño: ") for w in msgs]
                        entries.append((r.original_path, ", ".join(items), annotation))
                    else:
                        entries.append((r.original_path, msgs[0], annotation))
            if entries:
@@ -3,7 +3,7 @@
 import os
 import re
-from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
+from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED
 from core.result import StepResult
@@ -104,6 +104,20 @@ def check_image_extensions(names: list[str]) -> StepResult:
    return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
 def check_foreign(names: list[str]) -> StepResult:
    """Detecta ficheros que no son imágenes ni metadata permitida."""
    found = []
    for name in names:
        basename = name.replace("\\", "/").rstrip("/").rsplit("/", 1)[-1]
        if not basename:
            continue  # entrada de directorio
        ext = os.path.splitext(basename)[1].lower()
        if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED:
            found.append(name)
    warnings = [f"Fichero extraño: {f}" for f in sorted(found)]
    return StepResult(step="check_foreign", changed=False, warnings=warnings)
 def check_comicinfo(names: list[str]) -> StepResult:
    """Detecta ausencia de ComicInfo.xml."""
    found = any(
@@ -2,7 +2,7 @@
 import os
 import shutil
-from core.constants import TRASH_FILES
+from core.constants import TRASH_FILES, IMAGE_EXTENSIONS, FOREIGN_ALLOWED
 from core.result import StepResult
@@ -19,6 +19,11 @@ def clean_directory(work_dir: str) -> StepResult:
                full = os.path.join(root, f)
                os.remove(full)
                removed.append(os.path.relpath(full, work_dir))
            elif os.path.splitext(f)[1].lower() not in IMAGE_EXTENSIONS \
                    and f.lower() not in FOREIGN_ALLOWED:
                full = os.path.join(root, f)
                os.remove(full)
                removed.append(os.path.relpath(full, work_dir))
        for d in dirs:
            if d.lower() in TRASH_FILES:
                full = os.path.join(root, d)