# processors/checks.py import os import re from core.constants import IMAGE_EXTENSIONS, TRASH_FILES from core.result import StepResult def check_trash(work_dir: str) -> StepResult: """Detecta ficheros y directorios de basura presentes en el archivo.""" found = [] for root, dirs, files in os.walk(work_dir): for name in files: if name.lower() in TRASH_FILES: found.append(os.path.relpath(os.path.join(root, name), work_dir)) for name in dirs: if name.lower() in TRASH_FILES: found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/") warnings = [f"Basura detectada: {f}" for f in found] return StepResult(step="check_trash", changed=False, warnings=warnings) def _natural_sort_key(name: str): parts = re.split(r"(\d+)", name) return [int(p) if p.isdigit() else p.lower() for p in parts] def check_page_numbering(work_dir: str) -> StepResult: """Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos.""" images = [] for root, _, files in os.walk(work_dir): for f in files: if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS: images.append(f) images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0])) total = len(images) if total == 0: return StepResult(step="check_page_numbering", changed=False) warnings = [] # Extraer primer bloque numérico del stem de cada imagen numeric_values = [] numeric_widths = [] for img in images: stem = os.path.splitext(img)[0] m = re.search(r"(\d+)", stem) if m: numeric_values.append(int(m.group(1))) numeric_widths.append(len(m.group(1))) else: numeric_values.append(None) numeric_widths.append(None) has_numbers = [v for v in numeric_values if v is not None] if not has_numbers: return StepResult(step="check_page_numbering", changed=False) # Comprobar zero-padding si hay >= 10 imágenes if total >= 10: required_width = len(str(total)) widths_set = set(w for w in numeric_widths if w is not None) if len(widths_set) > 1: warnings.append("Padding inconsistente entre páginas") elif widths_set and min(widths_set) < required_width: warnings.append( f"Páginas no zero-padded: se necesita ancho {required_width}, " f"encontrado {min(widths_set)}" ) # Comprobar secuencia continua sorted_values = sorted(v for v in numeric_values if v is not None) if sorted_values: first = sorted_values[0] expected = list(range(first, first + len(sorted_values))) if sorted_values != expected: warnings.append("Páginas no secuenciales: hay saltos en la numeración") return StepResult(step="check_page_numbering", changed=False, warnings=warnings) def check_image_extensions(work_dir: str) -> StepResult: """Detecta mezcla de formatos de imagen en el archivo.""" ext_set = set() for root, _, files in os.walk(work_dir): for f in files: ext = os.path.splitext(f)[1].lower() if ext in IMAGE_EXTENSIONS: # Normalizar .jpeg → .jpg normalized = ".jpg" if ext == ".jpeg" else ext ext_set.add(normalized) warnings = [] if len(ext_set) > 1: exts_str = ", ".join(sorted(ext_set)) warnings.append(f"Extensiones de imagen mezcladas: {exts_str}") return StepResult(step="check_image_extensions", changed=False, warnings=warnings) def check_comicinfo(work_dir: str) -> StepResult: """Detecta ausencia de ComicInfo.xml.""" warnings = [] found = False for root, _, files in os.walk(work_dir): for f in files: if f.lower() == "comicinfo.xml": found = True break if found: break if not found: warnings.append("Falta ComicInfo.xml") return StepResult(step="check_comicinfo", changed=False, warnings=warnings)