detecció de fitxers extranys en el fitxer

This commit is contained in:
2026-02-20 12:50:03 +01:00
parent eeb9ce6879
commit 448c50b846
5 changed files with 38 additions and 11 deletions
+2
View File
@@ -8,3 +8,5 @@ TRASH_FILES = {
"__macosx", "__macosx",
"desktop.ini", "desktop.ini",
} }
FOREIGN_ALLOWED = {"comicinfo.xml"} # nombres exactos (lowercase) permitidos aunque no sean imágenes
+11 -9
View File
@@ -18,6 +18,7 @@ from processors.checks import (
check_page_numbering, check_page_numbering,
check_image_extensions, check_image_extensions,
check_comicinfo, check_comicinfo,
check_foreign,
) )
from processors.page_normalizer import normalize_pages, preview_normalize_pages from processors.page_normalizer import normalize_pages, preview_normalize_pages
from processors.image_normalizer import ( from processors.image_normalizer import (
@@ -45,12 +46,11 @@ class Pipeline:
def _compute_preview(self, step: str, temp_dir: str, step_results: list) -> dict: def _compute_preview(self, step: str, temp_dir: str, step_results: list) -> dict:
if step == "clean": if step == "clean":
trash_result = next((r for r in step_results if r.step == "check_trash"), None) trash_r = next((r for r in step_results if r.step == "check_trash"), None)
if trash_result: foreign_r = next((r for r in step_results if r.step == "check_foreign"), None)
prefix = "Basura detectada: " prefix = "Basura detectada: "
items = [w.removeprefix(prefix) for w in trash_result.warnings if w.startswith(prefix)] items = [w.removeprefix(prefix) for w in (trash_r.warnings if trash_r else []) if w.startswith(prefix)]
else: items += [w.removeprefix("Fichero extraño: ") for w in (foreign_r.warnings if foreign_r else [])]
items = []
return {"items": items} return {"items": items}
elif step == "normalize_pages": elif step == "normalize_pages":
@@ -81,8 +81,9 @@ class Pipeline:
if ext != self.desired_format: if ext != self.desired_format:
return True return True
if step == "clean": if step == "clean":
trash = next((r for r in step_results if r.step == "check_trash"), None) trash = next((r for r in step_results if r.step == "check_trash"), None)
if trash and trash.warnings: foreign = next((r for r in step_results if r.step == "check_foreign"), None)
if (trash and trash.warnings) or (foreign and foreign.warnings):
return True return True
return False return False
@@ -104,12 +105,13 @@ class Pipeline:
step_results.append(StepResult(step="list", changed=False, errors=[str(exc)])) step_results.append(StepResult(step="list", changed=False, errors=[str(exc)]))
return ComicResult(original_path=path, final_path=None, steps=step_results) return ComicResult(original_path=path, final_path=None, steps=step_results)
# 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer) # 3. Ejecutar siempre los content checks sobre los nombres (sin extraer)
step_results += [ step_results += [
check_trash(names), check_trash(names),
check_page_numbering(names), check_page_numbering(names),
check_image_extensions(names), check_image_extensions(names),
check_comicinfo(names), check_comicinfo(names),
check_foreign(names),
] ]
# 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo # 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
+4
View File
@@ -131,6 +131,7 @@ class SummaryCollector:
categories = [ categories = [
("Extensión incorrecta", "validate", lambda w: "Extensión incorrecta" in w, ["convert"], "convertido"), ("Extensión incorrecta", "validate", lambda w: "Extensión incorrecta" in w, ["convert"], "convertido"),
("Basura detectada", "check_trash", lambda w: True, ["clean"], "limpiado"), ("Basura detectada", "check_trash", lambda w: True, ["clean"], "limpiado"),
("Ficheros extraños", "check_foreign", lambda w: True, ["clean"], "limpiado"),
("Numeración de páginas", "check_page_numbering", lambda w: True, ["normalize_pages"], "renumerado"), ("Numeración de páginas", "check_page_numbering", lambda w: True, ["normalize_pages"], "renumerado"),
("Imágenes mezcladas", "check_image_extensions", lambda w: True, ["normalize_images", "convert_images"], "normalizado"), ("Imágenes mezcladas", "check_image_extensions", lambda w: True, ["normalize_images", "convert_images"], "normalizado"),
("Sin ComicInfo.xml", "check_comicinfo", lambda w: True, [], None), ("Sin ComicInfo.xml", "check_comicinfo", lambda w: True, [], None),
@@ -155,6 +156,9 @@ class SummaryCollector:
elif step_name == "check_trash": elif step_name == "check_trash":
items = [w.removeprefix("Basura detectada: ") for w in msgs] items = [w.removeprefix("Basura detectada: ") for w in msgs]
entries.append((r.original_path, ", ".join(items), annotation)) entries.append((r.original_path, ", ".join(items), annotation))
elif step_name == "check_foreign":
items = [w.removeprefix("Fichero extraño: ") for w in msgs]
entries.append((r.original_path, ", ".join(items), annotation))
else: else:
entries.append((r.original_path, msgs[0], annotation)) entries.append((r.original_path, msgs[0], annotation))
if entries: if entries:
+15 -1
View File
@@ -3,7 +3,7 @@
import os import os
import re import re
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED
from core.result import StepResult from core.result import StepResult
@@ -104,6 +104,20 @@ def check_image_extensions(names: list[str]) -> StepResult:
return StepResult(step="check_image_extensions", changed=False, warnings=warnings) return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
def check_foreign(names: list[str]) -> StepResult:
"""Detecta ficheros que no son imágenes ni metadata permitida."""
found = []
for name in names:
basename = name.replace("\\", "/").rstrip("/").rsplit("/", 1)[-1]
if not basename:
continue # entrada de directorio
ext = os.path.splitext(basename)[1].lower()
if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED:
found.append(name)
warnings = [f"Fichero extraño: {f}" for f in sorted(found)]
return StepResult(step="check_foreign", changed=False, warnings=warnings)
def check_comicinfo(names: list[str]) -> StepResult: def check_comicinfo(names: list[str]) -> StepResult:
"""Detecta ausencia de ComicInfo.xml.""" """Detecta ausencia de ComicInfo.xml."""
found = any( found = any(
+6 -1
View File
@@ -2,7 +2,7 @@
import os import os
import shutil import shutil
from core.constants import TRASH_FILES from core.constants import TRASH_FILES, IMAGE_EXTENSIONS, FOREIGN_ALLOWED
from core.result import StepResult from core.result import StepResult
@@ -19,6 +19,11 @@ def clean_directory(work_dir: str) -> StepResult:
full = os.path.join(root, f) full = os.path.join(root, f)
os.remove(full) os.remove(full)
removed.append(os.path.relpath(full, work_dir)) removed.append(os.path.relpath(full, work_dir))
elif os.path.splitext(f)[1].lower() not in IMAGE_EXTENSIONS \
and f.lower() not in FOREIGN_ALLOWED:
full = os.path.join(root, f)
os.remove(full)
removed.append(os.path.relpath(full, work_dir))
for d in dirs: for d in dirs:
if d.lower() in TRASH_FILES: if d.lower() in TRASH_FILES:
full = os.path.join(root, d) full = os.path.join(root, d)