From 448c50b846cb39909393be2f9b365925cac7fd56 Mon Sep 17 00:00:00 2001 From: Sergio Date: Fri, 20 Feb 2026 12:50:03 +0100 Subject: [PATCH] =?UTF-8?q?detecci=C3=B3=20de=20fitxers=20extranys=20en=20?= =?UTF-8?q?el=20fitxer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/constants.py | 2 ++ core/pipeline.py | 20 +++++++++++--------- core/summary.py | 4 ++++ processors/checks.py | 16 +++++++++++++++- processors/cleaner.py | 7 ++++++- 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/core/constants.py b/core/constants.py index d7ea31b..8d9d070 100644 --- a/core/constants.py +++ b/core/constants.py @@ -8,3 +8,5 @@ TRASH_FILES = { "__macosx", "desktop.ini", } + +FOREIGN_ALLOWED = {"comicinfo.xml"} # nombres exactos (lowercase) permitidos aunque no sean imágenes diff --git a/core/pipeline.py b/core/pipeline.py index 641fff8..bd3658f 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -18,6 +18,7 @@ from processors.checks import ( check_page_numbering, check_image_extensions, check_comicinfo, + check_foreign, ) from processors.page_normalizer import normalize_pages, preview_normalize_pages from processors.image_normalizer import ( @@ -45,12 +46,11 @@ class Pipeline: def _compute_preview(self, step: str, temp_dir: str, step_results: list) -> dict: if step == "clean": - trash_result = next((r for r in step_results if r.step == "check_trash"), None) - if trash_result: - prefix = "Basura detectada: " - items = [w.removeprefix(prefix) for w in trash_result.warnings if w.startswith(prefix)] - else: - items = [] + trash_r = next((r for r in step_results if r.step == "check_trash"), None) + foreign_r = next((r for r in step_results if r.step == "check_foreign"), None) + prefix = "Basura detectada: " + items = [w.removeprefix(prefix) for w in (trash_r.warnings if trash_r else []) if w.startswith(prefix)] + items += [w.removeprefix("Fichero extraño: ") for w in (foreign_r.warnings if foreign_r else [])] return {"items": items} elif step == "normalize_pages": @@ -81,8 +81,9 @@ class Pipeline: if ext != self.desired_format: return True if step == "clean": - trash = next((r for r in step_results if r.step == "check_trash"), None) - if trash and trash.warnings: + trash = next((r for r in step_results if r.step == "check_trash"), None) + foreign = next((r for r in step_results if r.step == "check_foreign"), None) + if (trash and trash.warnings) or (foreign and foreign.warnings): return True return False @@ -104,12 +105,13 @@ class Pipeline: step_results.append(StepResult(step="list", changed=False, errors=[str(exc)])) return ComicResult(original_path=path, final_path=None, steps=step_results) - # 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer) + # 3. Ejecutar siempre los content checks sobre los nombres (sin extraer) step_results += [ check_trash(names), check_page_numbering(names), check_image_extensions(names), check_comicinfo(names), + check_foreign(names), ] # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo diff --git a/core/summary.py b/core/summary.py index aae921b..2d7731f 100644 --- a/core/summary.py +++ b/core/summary.py @@ -131,6 +131,7 @@ class SummaryCollector: categories = [ ("Extensión incorrecta", "validate", lambda w: "Extensión incorrecta" in w, ["convert"], "convertido"), ("Basura detectada", "check_trash", lambda w: True, ["clean"], "limpiado"), + ("Ficheros extraños", "check_foreign", lambda w: True, ["clean"], "limpiado"), ("Numeración de páginas", "check_page_numbering", lambda w: True, ["normalize_pages"], "renumerado"), ("Imágenes mezcladas", "check_image_extensions", lambda w: True, ["normalize_images", "convert_images"], "normalizado"), ("Sin ComicInfo.xml", "check_comicinfo", lambda w: True, [], None), @@ -155,6 +156,9 @@ class SummaryCollector: elif step_name == "check_trash": items = [w.removeprefix("Basura detectada: ") for w in msgs] entries.append((r.original_path, ", ".join(items), annotation)) + elif step_name == "check_foreign": + items = [w.removeprefix("Fichero extraño: ") for w in msgs] + entries.append((r.original_path, ", ".join(items), annotation)) else: entries.append((r.original_path, msgs[0], annotation)) if entries: diff --git a/processors/checks.py b/processors/checks.py index df5fd36..56425d7 100644 --- a/processors/checks.py +++ b/processors/checks.py @@ -3,7 +3,7 @@ import os import re -from core.constants import IMAGE_EXTENSIONS, TRASH_FILES +from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED from core.result import StepResult @@ -104,6 +104,20 @@ def check_image_extensions(names: list[str]) -> StepResult: return StepResult(step="check_image_extensions", changed=False, warnings=warnings) +def check_foreign(names: list[str]) -> StepResult: + """Detecta ficheros que no son imágenes ni metadata permitida.""" + found = [] + for name in names: + basename = name.replace("\\", "/").rstrip("/").rsplit("/", 1)[-1] + if not basename: + continue # entrada de directorio + ext = os.path.splitext(basename)[1].lower() + if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED: + found.append(name) + warnings = [f"Fichero extraño: {f}" for f in sorted(found)] + return StepResult(step="check_foreign", changed=False, warnings=warnings) + + def check_comicinfo(names: list[str]) -> StepResult: """Detecta ausencia de ComicInfo.xml.""" found = any( diff --git a/processors/cleaner.py b/processors/cleaner.py index 2d290e4..dca12f0 100644 --- a/processors/cleaner.py +++ b/processors/cleaner.py @@ -2,7 +2,7 @@ import os import shutil -from core.constants import TRASH_FILES +from core.constants import TRASH_FILES, IMAGE_EXTENSIONS, FOREIGN_ALLOWED from core.result import StepResult @@ -19,6 +19,11 @@ def clean_directory(work_dir: str) -> StepResult: full = os.path.join(root, f) os.remove(full) removed.append(os.path.relpath(full, work_dir)) + elif os.path.splitext(f)[1].lower() not in IMAGE_EXTENSIONS \ + and f.lower() not in FOREIGN_ALLOWED: + full = os.path.join(root, f) + os.remove(full) + removed.append(os.path.relpath(full, work_dir)) for d in dirs: if d.lower() in TRASH_FILES: full = os.path.join(root, d)