b462c9fb1e
duplica els resultats de fitxers brossa mes info en --listar --aplanar
183 lines
6.6 KiB
Python
183 lines
6.6 KiB
Python
# processors/checks.py
|
|
|
|
import os
|
|
import re
|
|
|
|
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED
|
|
from core.result import StepResult
|
|
|
|
|
|
def check_trash(names: list[str]) -> StepResult:
|
|
"""Detecta ficheros y directorios de basura presentes en el archivo."""
|
|
found = []
|
|
reported = set()
|
|
for name in names:
|
|
parts = name.replace("\\", "/").rstrip("/").split("/")
|
|
for i, part in enumerate(parts):
|
|
if part.lower() in TRASH_FILES:
|
|
key = "/".join(parts[: i + 1])
|
|
if key not in reported:
|
|
found.append(key)
|
|
reported.add(key)
|
|
break
|
|
|
|
warnings = [f"Basura detectada: {f}" for f in sorted(found)]
|
|
return StepResult(step="check_trash", changed=False, warnings=warnings)
|
|
|
|
|
|
def _natural_sort_key(name: str):
|
|
parts = re.split(r"(\d+)", name)
|
|
return [int(p) if p.isdigit() else p.lower() for p in parts]
|
|
|
|
|
|
def check_page_numbering(names: list[str]) -> StepResult:
|
|
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
|
|
images = sorted(
|
|
[
|
|
name.replace("\\", "/").rsplit("/", 1)[-1]
|
|
for name in names
|
|
if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS
|
|
],
|
|
key=lambda f: _natural_sort_key(os.path.splitext(f)[0]),
|
|
)
|
|
total = len(images)
|
|
if total == 0:
|
|
return StepResult(step="check_page_numbering", changed=False)
|
|
|
|
warnings = []
|
|
|
|
# Extraer primer bloque numérico del stem de cada imagen
|
|
numeric_values = []
|
|
numeric_widths = []
|
|
for img in images:
|
|
stem = os.path.splitext(img)[0]
|
|
m = re.search(r"(\d+)", stem)
|
|
if m:
|
|
numeric_values.append(int(m.group(1)))
|
|
numeric_widths.append(len(m.group(1)))
|
|
else:
|
|
numeric_values.append(None)
|
|
numeric_widths.append(None)
|
|
|
|
has_numbers = [v for v in numeric_values if v is not None]
|
|
if not has_numbers:
|
|
return StepResult(step="check_page_numbering", changed=False)
|
|
|
|
# Comprobar zero-padding si hay >= 10 imágenes
|
|
if total >= 10:
|
|
required_width = len(str(total))
|
|
widths_set = set(w for w in numeric_widths if w is not None)
|
|
if len(widths_set) > 1:
|
|
warnings.append("Padding inconsistente entre páginas")
|
|
elif widths_set and min(widths_set) < required_width:
|
|
warnings.append(
|
|
f"Páginas no zero-padded: se necesita ancho {required_width}, "
|
|
f"encontrado {min(widths_set)}"
|
|
)
|
|
|
|
# Comprobar secuencia continua
|
|
sorted_values = sorted(v for v in numeric_values if v is not None)
|
|
if sorted_values:
|
|
first = sorted_values[0]
|
|
expected = list(range(first, first + len(sorted_values)))
|
|
if sorted_values != expected:
|
|
warnings.append("Páginas no secuenciales: hay saltos en la numeración")
|
|
|
|
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
|
|
|
|
|
|
def check_image_extensions(names: list[str]) -> StepResult:
|
|
"""Detecta mezcla de formatos de imagen en el archivo."""
|
|
ext_set = set()
|
|
for name in names:
|
|
ext = os.path.splitext(name)[1].lower()
|
|
if ext in IMAGE_EXTENSIONS:
|
|
# Normalizar .jpeg → .jpg
|
|
normalized = ".jpg" if ext == ".jpeg" else ext
|
|
ext_set.add(normalized)
|
|
|
|
warnings = []
|
|
if len(ext_set) > 1:
|
|
exts_str = ", ".join(sorted(ext_set))
|
|
warnings.append(f"Extensiones de imagen mezcladas: {exts_str}")
|
|
|
|
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
|
|
|
|
|
|
def check_foreign(names: list[str]) -> StepResult:
|
|
"""Detecta ficheros que no son imágenes ni metadata permitida."""
|
|
found = []
|
|
for name in names:
|
|
normalized = name.replace("\\", "/")
|
|
if normalized.endswith("/"):
|
|
continue # entrada de directorio — ignorar siempre
|
|
basename = normalized.rsplit("/", 1)[-1]
|
|
if not basename:
|
|
continue
|
|
ext = os.path.splitext(basename)[1].lower()
|
|
if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED and basename.lower() not in TRASH_FILES:
|
|
found.append(name)
|
|
warnings = [f"Fichero extraño: {f}" for f in sorted(found)]
|
|
return StepResult(step="check_foreign", changed=False, warnings=warnings)
|
|
|
|
|
|
def check_nested(names: list[str]) -> StepResult:
|
|
"""Detecta imágenes en subdirectorios en lugar de en la raíz del archivo."""
|
|
subdirs_with_images: set[str] = set()
|
|
for name in names:
|
|
normalized = name.replace("\\", "/")
|
|
if normalized.endswith("/"):
|
|
continue
|
|
parts = normalized.split("/")
|
|
if len(parts) < 2:
|
|
continue # fichero en raíz
|
|
ext = os.path.splitext(parts[-1])[1].lower()
|
|
if ext in IMAGE_EXTENSIONS:
|
|
subdirs_with_images.add(parts[0])
|
|
|
|
if not subdirs_with_images:
|
|
return StepResult(step="check_nested", changed=False)
|
|
|
|
if len(subdirs_with_images) == 1:
|
|
subdir = next(iter(subdirs_with_images))
|
|
warnings = [f"Imágenes en subdirectorio: {subdir}/"]
|
|
else:
|
|
listing = ", ".join(sorted(subdirs_with_images))
|
|
warnings = [f"Múltiples subdirectorios con imágenes: {listing}"]
|
|
return StepResult(step="check_nested", changed=False, warnings=warnings)
|
|
|
|
|
|
def check_comicinfo(names: list[str]) -> StepResult:
|
|
"""Detecta ausencia de ComicInfo.xml."""
|
|
found = any(
|
|
name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml"
|
|
for name in names
|
|
)
|
|
warnings = [] if found else ["Falta ComicInfo.xml"]
|
|
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)
|
|
|
|
|
|
def check_extension_case(names: list[str], mode: str = "lower") -> StepResult:
|
|
"""Detecta imágenes cuya extensión no está en el case esperado (lower/upper)."""
|
|
mismatches = []
|
|
for name in names:
|
|
normalized = name.replace("\\", "/")
|
|
if normalized.endswith("/"):
|
|
continue
|
|
basename = normalized.rsplit("/", 1)[-1]
|
|
_, ext = os.path.splitext(basename)
|
|
if not ext or ext.lower() not in IMAGE_EXTENSIONS:
|
|
continue
|
|
target_ext = ext.lower() if mode == "lower" else ext.upper()
|
|
if ext != target_ext:
|
|
mismatches.append(basename)
|
|
|
|
if not mismatches:
|
|
warnings = []
|
|
elif len(mismatches) <= 3:
|
|
warnings = [f"Case incorrecto en extensión: {f}" for f in sorted(mismatches)]
|
|
else:
|
|
warnings = [f"Case incorrecto en extensión: {len(mismatches)} ficheros"]
|
|
|
|
return StepResult(step="check_extension_case", changed=False, warnings=warnings)
|