Files
comic-manager/processors/checks.py
T

115 lines
3.9 KiB
Python

# processors/checks.py
import os
import re
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
from core.result import StepResult
def check_trash(names: list[str]) -> StepResult:
"""Detecta ficheros y directorios de basura presentes en el archivo."""
found = []
reported = set()
for name in names:
parts = name.replace("\\", "/").rstrip("/").split("/")
for i, part in enumerate(parts):
if part.lower() in TRASH_FILES:
key = "/".join(parts[: i + 1])
if key not in reported:
found.append(key)
reported.add(key)
break
warnings = [f"Basura detectada: {f}" for f in sorted(found)]
return StepResult(step="check_trash", changed=False, warnings=warnings)
def _natural_sort_key(name: str):
parts = re.split(r"(\d+)", name)
return [int(p) if p.isdigit() else p.lower() for p in parts]
def check_page_numbering(names: list[str]) -> StepResult:
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
images = sorted(
[
name.replace("\\", "/").rsplit("/", 1)[-1]
for name in names
if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS
],
key=lambda f: _natural_sort_key(os.path.splitext(f)[0]),
)
total = len(images)
if total == 0:
return StepResult(step="check_page_numbering", changed=False)
warnings = []
# Extraer primer bloque numérico del stem de cada imagen
numeric_values = []
numeric_widths = []
for img in images:
stem = os.path.splitext(img)[0]
m = re.search(r"(\d+)", stem)
if m:
numeric_values.append(int(m.group(1)))
numeric_widths.append(len(m.group(1)))
else:
numeric_values.append(None)
numeric_widths.append(None)
has_numbers = [v for v in numeric_values if v is not None]
if not has_numbers:
return StepResult(step="check_page_numbering", changed=False)
# Comprobar zero-padding si hay >= 10 imágenes
if total >= 10:
required_width = len(str(total))
widths_set = set(w for w in numeric_widths if w is not None)
if len(widths_set) > 1:
warnings.append("Padding inconsistente entre páginas")
elif widths_set and min(widths_set) < required_width:
warnings.append(
f"Páginas no zero-padded: se necesita ancho {required_width}, "
f"encontrado {min(widths_set)}"
)
# Comprobar secuencia continua
sorted_values = sorted(v for v in numeric_values if v is not None)
if sorted_values:
first = sorted_values[0]
expected = list(range(first, first + len(sorted_values)))
if sorted_values != expected:
warnings.append("Páginas no secuenciales: hay saltos en la numeración")
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
def check_image_extensions(names: list[str]) -> StepResult:
"""Detecta mezcla de formatos de imagen en el archivo."""
ext_set = set()
for name in names:
ext = os.path.splitext(name)[1].lower()
if ext in IMAGE_EXTENSIONS:
# Normalizar .jpeg → .jpg
normalized = ".jpg" if ext == ".jpeg" else ext
ext_set.add(normalized)
warnings = []
if len(ext_set) > 1:
exts_str = ", ".join(sorted(ext_set))
warnings.append(f"Extensiones de imagen mezcladas: {exts_str}")
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
def check_comicinfo(names: list[str]) -> StepResult:
"""Detecta ausencia de ComicInfo.xml."""
found = any(
name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml"
for name in names
)
warnings = [] if found else ["Falta ComicInfo.xml"]
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)