# processors/checks.py import os import re from core.constants import IMAGE_EXTENSIONS, TRASH_FILES, FOREIGN_ALLOWED from core.result import StepResult def check_trash(names: list[str]) -> StepResult: """Detecta ficheros y directorios de basura presentes en el archivo.""" found = [] reported = set() for name in names: parts = name.replace("\\", "/").rstrip("/").split("/") for i, part in enumerate(parts): if part.lower() in TRASH_FILES: key = "/".join(parts[: i + 1]) if key not in reported: found.append(key) reported.add(key) break warnings = [f"Basura detectada: {f}" for f in sorted(found)] return StepResult(step="check_trash", changed=False, warnings=warnings) def _natural_sort_key(name: str): parts = re.split(r"(\d+)", name) return [int(p) if p.isdigit() else p.lower() for p in parts] def check_page_numbering(names: list[str]) -> StepResult: """Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos.""" images = sorted( [ name.replace("\\", "/").rsplit("/", 1)[-1] for name in names if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS ], key=lambda f: _natural_sort_key(os.path.splitext(f)[0]), ) total = len(images) if total == 0: return StepResult(step="check_page_numbering", changed=False) warnings = [] # Extraer primer bloque numérico del stem de cada imagen numeric_values = [] numeric_widths = [] for img in images: stem = os.path.splitext(img)[0] m = re.search(r"(\d+)", stem) if m: numeric_values.append(int(m.group(1))) numeric_widths.append(len(m.group(1))) else: numeric_values.append(None) numeric_widths.append(None) has_numbers = [v for v in numeric_values if v is not None] if not has_numbers: return StepResult(step="check_page_numbering", changed=False) # Comprobar zero-padding si hay >= 10 imágenes if total >= 10: required_width = len(str(total)) widths_set = set(w for w in numeric_widths if w is not None) if len(widths_set) > 1: warnings.append("Padding inconsistente entre páginas") elif widths_set and min(widths_set) < required_width: warnings.append( f"Páginas no zero-padded: se necesita ancho {required_width}, " f"encontrado {min(widths_set)}" ) # Comprobar secuencia continua sorted_values = sorted(v for v in numeric_values if v is not None) if sorted_values: first = sorted_values[0] expected = list(range(first, first + len(sorted_values))) if sorted_values != expected: warnings.append("Páginas no secuenciales: hay saltos en la numeración") return StepResult(step="check_page_numbering", changed=False, warnings=warnings) def check_image_extensions(names: list[str]) -> StepResult: """Detecta mezcla de formatos de imagen en el archivo.""" ext_set = set() for name in names: ext = os.path.splitext(name)[1].lower() if ext in IMAGE_EXTENSIONS: # Normalizar .jpeg → .jpg normalized = ".jpg" if ext == ".jpeg" else ext ext_set.add(normalized) warnings = [] if len(ext_set) > 1: exts_str = ", ".join(sorted(ext_set)) warnings.append(f"Extensiones de imagen mezcladas: {exts_str}") return StepResult(step="check_image_extensions", changed=False, warnings=warnings) def check_foreign(names: list[str]) -> StepResult: """Detecta ficheros que no son imágenes ni metadata permitida.""" found = [] for name in names: normalized = name.replace("\\", "/") if normalized.endswith("/"): continue # entrada de directorio — ignorar siempre basename = normalized.rsplit("/", 1)[-1] if not basename: continue ext = os.path.splitext(basename)[1].lower() if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED and basename.lower() not in TRASH_FILES: found.append(name) warnings = [f"Fichero extraño: {f}" for f in sorted(found)] return StepResult(step="check_foreign", changed=False, warnings=warnings) def check_nested(names: list[str]) -> StepResult: """Detecta imágenes en subdirectorios en lugar de en la raíz del archivo.""" subdirs_with_images: set[str] = set() for name in names: normalized = name.replace("\\", "/") if normalized.endswith("/"): continue parts = normalized.split("/") if len(parts) < 2: continue # fichero en raíz ext = os.path.splitext(parts[-1])[1].lower() if ext in IMAGE_EXTENSIONS: subdirs_with_images.add(parts[0]) if not subdirs_with_images: return StepResult(step="check_nested", changed=False) if len(subdirs_with_images) == 1: subdir = next(iter(subdirs_with_images)) warnings = [f"Imágenes en subdirectorio: {subdir}/"] else: listing = ", ".join(sorted(subdirs_with_images)) warnings = [f"Múltiples subdirectorios con imágenes: {listing}"] return StepResult(step="check_nested", changed=False, warnings=warnings) def check_comicinfo(names: list[str]) -> StepResult: """Detecta ausencia de ComicInfo.xml.""" found = any( name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml" for name in names ) warnings = [] if found else ["Falta ComicInfo.xml"] return StepResult(step="check_comicinfo", changed=False, warnings=warnings) def check_extension_case(names: list[str], mode: str = "lower") -> StepResult: """Detecta imágenes cuya extensión no está en el case esperado (lower/upper).""" mismatches = [] for name in names: normalized = name.replace("\\", "/") if normalized.endswith("/"): continue basename = normalized.rsplit("/", 1)[-1] _, ext = os.path.splitext(basename) if not ext or ext.lower() not in IMAGE_EXTENSIONS: continue target_ext = ext.lower() if mode == "lower" else ext.upper() if ext != target_ext: mismatches.append(basename) if not mismatches: warnings = [] elif len(mismatches) <= 3: warnings = [f"Case incorrecto en extensión: {f}" for f in sorted(mismatches)] else: warnings = [f"Case incorrecto en extensión: {len(mismatches)} ficheros"] return StepResult(step="check_extension_case", changed=False, warnings=warnings)