From 448c50b846cb39909393be2f9b365925cac7fd56 Mon Sep 17 00:00:00 2001
From: Sergio <jaildesigner@gmail.com>
Date: Fri, 20 Feb 2026 12:50:03 +0100
Subject: [PATCH] =?UTF-8?q?detecci=C3=B3=20de=20fitxers=20extranys=20en=20?=
 =?UTF-8?q?el=20fitxer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 core/constants.py     |  2 ++
 core/pipeline.py      | 20 +++++++++++---------
 core/summary.py       |  4 ++++
 processors/checks.py  | 16 +++++++++++++++-
 processors/cleaner.py |  7 ++++++-
 5 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/core/constants.py b/core/constants.py
index d7ea31b..8d9d070 100644
--- a/core/constants.py
+++ b/core/constants.py
@@ -8,3 +8,5 @@ TRASH_FILES = {
     "__macosx",
     "desktop.ini",
 }
+
+FOREIGN_ALLOWED = {"comicinfo.xml"}  # nombres exactos (lowercase) permitidos aunque no sean imágenes
diff --git a/core/pipeline.py b/core/pipeline.py
index 641fff8..bd3658f 100644
--- a/core/pipeline.py
+++ b/core/pipeline.py
@@ -18,6 +18,7 @@ from processors.checks import (
     check_page_numbering,
     check_image_extensions,
     check_comicinfo,
+    check_foreign,
 )
 from processors.page_normalizer import normalize_pages, preview_normalize_pages
 from processors.image_normalizer import (
@@ -45,12 +46,11 @@ class Pipeline:
 
     def _compute_preview(self, step: str, temp_dir: str, step_results: list) -> dict:
         if step == "clean":
-            trash_result = next((r for r in step_results if r.step == "check_trash"), None)
-            if trash_result:
-                prefix = "Basura detectada: "
-                items = [w.removeprefix(prefix) for w in trash_result.warnings if w.startswith(prefix)]
-            else:
-                items = []
+            trash_r   = next((r for r in step_results if r.step == "check_trash"),   None)
+            foreign_r = next((r for r in step_results if r.step == "check_foreign"), None)
+            prefix = "Basura detectada: "
+            items  = [w.removeprefix(prefix) for w in (trash_r.warnings   if trash_r   else []) if w.startswith(prefix)]
+            items += [w.removeprefix("Fichero extraño: ") for w in (foreign_r.warnings if foreign_r else [])]
             return {"items": items}
 
         elif step == "normalize_pages":
@@ -81,8 +81,9 @@ class Pipeline:
                 if ext != self.desired_format:
                     return True
             if step == "clean":
-                trash = next((r for r in step_results if r.step == "check_trash"), None)
-                if trash and trash.warnings:
+                trash   = next((r for r in step_results if r.step == "check_trash"),   None)
+                foreign = next((r for r in step_results if r.step == "check_foreign"), None)
+                if (trash and trash.warnings) or (foreign and foreign.warnings):
                     return True
         return False
 
@@ -104,12 +105,13 @@ class Pipeline:
             step_results.append(StepResult(step="list", changed=False, errors=[str(exc)]))
             return ComicResult(original_path=path, final_path=None, steps=step_results)
 
-        # 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer)
+        # 3. Ejecutar siempre los content checks sobre los nombres (sin extraer)
         step_results += [
             check_trash(names),
             check_page_numbering(names),
             check_image_extensions(names),
             check_comicinfo(names),
+            check_foreign(names),
         ]
 
         # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
diff --git a/core/summary.py b/core/summary.py
index aae921b..2d7731f 100644
--- a/core/summary.py
+++ b/core/summary.py
@@ -131,6 +131,7 @@ class SummaryCollector:
         categories = [
             ("Extensión incorrecta",  "validate",               lambda w: "Extensión incorrecta" in w, ["convert"],                           "convertido"),
             ("Basura detectada",       "check_trash",            lambda w: True,                         ["clean"],                             "limpiado"),
+            ("Ficheros extraños",      "check_foreign",          lambda w: True,                         ["clean"],                             "limpiado"),
             ("Numeración de páginas",  "check_page_numbering",   lambda w: True,                         ["normalize_pages"],                    "renumerado"),
             ("Imágenes mezcladas",     "check_image_extensions", lambda w: True,                         ["normalize_images", "convert_images"], "normalizado"),
             ("Sin ComicInfo.xml",      "check_comicinfo",        lambda w: True,                         [],                                    None),
@@ -155,6 +156,9 @@ class SummaryCollector:
                     elif step_name == "check_trash":
                         items = [w.removeprefix("Basura detectada: ") for w in msgs]
                         entries.append((r.original_path, ", ".join(items), annotation))
+                    elif step_name == "check_foreign":
+                        items = [w.removeprefix("Fichero extraño: ") for w in msgs]
+                        entries.append((r.original_path, ", ".join(items), annotation))
                     else:
                         entries.append((r.original_path, msgs[0], annotation))
             if entries:
diff --git a/processors/checks.py b/processors/checks.py
index df5fd36..56425d7 100644
--- a/processors/checks.py
+++ b/processors/checks.py
@@ -3,7 +3,7 @@
 import os
 import re
 
-from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
+from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED
 from core.result import StepResult
 
 
@@ -104,6 +104,20 @@ def check_image_extensions(names: list[str]) -> StepResult:
     return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
 
 
+def check_foreign(names: list[str]) -> StepResult:
+    """Detecta ficheros que no son imágenes ni metadata permitida."""
+    found = []
+    for name in names:
+        basename = name.replace("\\", "/").rstrip("/").rsplit("/", 1)[-1]
+        if not basename:
+            continue  # entrada de directorio
+        ext = os.path.splitext(basename)[1].lower()
+        if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED:
+            found.append(name)
+    warnings = [f"Fichero extraño: {f}" for f in sorted(found)]
+    return StepResult(step="check_foreign", changed=False, warnings=warnings)
+
+
 def check_comicinfo(names: list[str]) -> StepResult:
     """Detecta ausencia de ComicInfo.xml."""
     found = any(
diff --git a/processors/cleaner.py b/processors/cleaner.py
index 2d290e4..dca12f0 100644
--- a/processors/cleaner.py
+++ b/processors/cleaner.py
@@ -2,7 +2,7 @@
 
 import os
 import shutil
-from core.constants import TRASH_FILES
+from core.constants import TRASH_FILES, IMAGE_EXTENSIONS, FOREIGN_ALLOWED
 from core.result import StepResult
 
 
@@ -19,6 +19,11 @@ def clean_directory(work_dir: str) -> StepResult:
                 full = os.path.join(root, f)
                 os.remove(full)
                 removed.append(os.path.relpath(full, work_dir))
+            elif os.path.splitext(f)[1].lower() not in IMAGE_EXTENSIONS \
+                    and f.lower() not in FOREIGN_ALLOWED:
+                full = os.path.join(root, f)
+                os.remove(full)
+                removed.append(os.path.relpath(full, work_dir))
         for d in dirs:
             if d.lower() in TRASH_FILES:
                 full = os.path.join(root, d)