diff --git a/core/pipeline.py b/core/pipeline.py index 23e6364..18fdce9 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -10,6 +10,14 @@ from core.result import ComicResult, StepResult from processors.validator import validate_archive from processors.cleaner import clean_directory from processors.converter import needs_conversion, conversion_step_result +from processors.checks import ( + check_trash, + check_page_numbering, + check_image_extensions, + check_comicinfo, +) +from processors.page_normalizer import normalize_pages +from processors.image_normalizer import normalize_images class Pipeline: @@ -17,11 +25,13 @@ class Pipeline: self, steps: list, desired_format: str = "cbz", + desired_image_format: str = ".jpg", collision_policy: str = CollisionPolicy.ABORT, dry_run: bool = False, ): self.steps = steps self.desired_format = desired_format + self.desired_image_format = desired_image_format self.collision_policy = collision_policy self.dry_run = dry_run @@ -41,7 +51,15 @@ class Pipeline: try: extract_archive(path, temp_dir) - # 3. Aplicar cada step sobre el directorio temporal + # 3. Ejecutar siempre los 4 content checks + step_results += [ + check_trash(temp_dir), + check_page_numbering(temp_dir), + check_image_extensions(temp_dir), + check_comicinfo(temp_dir), + ] + + # 4. Aplicar cada fix step sobre el directorio temporal any_changed = False if "clean" in self.steps: @@ -50,6 +68,22 @@ class Pipeline: if clean_result.changed: any_changed = True + if "normalize_pages" in self.steps: + norm_result = normalize_pages(temp_dir) + step_results.append(norm_result) + if norm_result.changed: + any_changed = True + + if "normalize_images" in self.steps: + img_result = normalize_images(temp_dir, self.desired_image_format) + step_results.append(img_result) + if img_result.errors: + return ComicResult( + original_path=path, final_path=None, steps=step_results + ) + if img_result.changed: + any_changed = True + if "convert" in self.steps: conv_result = conversion_step_result(real_format, self.desired_format) step_results.append(conv_result) @@ -60,7 +94,7 @@ class Pipeline: if conv_result.changed: any_changed = True - # 4. Reempaquetar si hubo cambios o conversión de formato + # 5. Reempaquetar si hubo cambios o conversión de formato needs_repack = any_changed or ( "convert" in self.steps and needs_conversion(real_format, self.desired_format) diff --git a/core/result.py b/core/result.py index 73b98d8..a7daba0 100644 --- a/core/result.py +++ b/core/result.py @@ -31,6 +31,10 @@ class ComicResult: return f"OK [{self.original_path}] → {dest} ({', '.join(changed_steps)})" return f"OK [{self.original_path}] (sin cambios)" + def has_issues(self) -> bool: + """True si algún step tiene warnings o errors.""" + return any(s.warnings or s.errors for s in self.steps) + def full_report(self) -> str: lines = [f"Cómic: {self.original_path}"] for s in self.steps: diff --git a/main.py b/main.py index d0b3cde..48f1ec5 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,6 @@ import argparse from core.scanner import find_comic_files from core.pipeline import Pipeline -from processors.validator import validate_comic def parse_args(): @@ -16,6 +15,9 @@ def parse_args(): parser.add_argument("--convertir", action="store_true") parser.add_argument("--estandarizar", action="store_true") parser.add_argument("--formato", choices=["cbz", "cbr"], default="cbz") + parser.add_argument("--renumerar", action="store_true") + parser.add_argument("--uniformizar-imagenes", action="store_true") + parser.add_argument("--formato-imagen", choices=["jpg", "png", "webp"], default="jpg") return parser.parse_args() @@ -30,19 +32,30 @@ def main(): print(f) if args.validar: + pipeline = Pipeline(steps=[]) for f in comic_files: - res = validate_comic(f) - print(f"{f} → {res.summary()}") - print() + result = pipeline.run(f) + if result.has_issues(): + print(result.full_report()) + print() + return steps = [] if args.limpiar or args.estandarizar: steps.append("clean") + if args.renumerar: + steps.append("normalize_pages") + if args.uniformizar_imagenes: + steps.append("normalize_images") if args.convertir or args.estandarizar: steps.append("convert") if steps: - pipeline = Pipeline(steps=steps, desired_format=args.formato) + pipeline = Pipeline( + steps=steps, + desired_format=args.formato, + desired_image_format="." + args.formato_imagen, + ) for f in comic_files: result = pipeline.run(f) print(result.summary()) diff --git a/processors/checks.py b/processors/checks.py new file mode 100644 index 0000000..ed759a9 --- /dev/null +++ b/processors/checks.py @@ -0,0 +1,119 @@ +# processors/checks.py + +import os +import re + +from core.constants import IMAGE_EXTENSIONS, TRASH_FILES +from core.result import StepResult + + +def check_trash(work_dir: str) -> StepResult: + """Detecta ficheros y directorios de basura presentes en el archivo.""" + found = [] + for root, dirs, files in os.walk(work_dir): + for name in files: + if name.lower() in TRASH_FILES: + found.append(os.path.relpath(os.path.join(root, name), work_dir)) + for name in dirs: + if name.lower() in TRASH_FILES: + found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/") + + warnings = [f"Basura detectada: {f}" for f in found] + return StepResult(step="check_trash", changed=False, warnings=warnings) + + +def _natural_sort_key(name: str): + parts = re.split(r"(\d+)", name) + return [int(p) if p.isdigit() else p.lower() for p in parts] + + +def check_page_numbering(work_dir: str) -> StepResult: + """Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos.""" + images = [] + for root, _, files in os.walk(work_dir): + for f in files: + if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS: + images.append(f) + + images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0])) + total = len(images) + if total == 0: + return StepResult(step="check_page_numbering", changed=False) + + warnings = [] + + # Extraer primer bloque numérico del stem de cada imagen + numeric_values = [] + numeric_widths = [] + for img in images: + stem = os.path.splitext(img)[0] + m = re.search(r"(\d+)", stem) + if m: + numeric_values.append(int(m.group(1))) + numeric_widths.append(len(m.group(1))) + else: + numeric_values.append(None) + numeric_widths.append(None) + + has_numbers = [v for v in numeric_values if v is not None] + if not has_numbers: + return StepResult(step="check_page_numbering", changed=False) + + # Comprobar zero-padding si hay >= 10 imágenes + if total >= 10: + required_width = len(str(total)) + widths_set = set(w for w in numeric_widths if w is not None) + if len(widths_set) > 1: + warnings.append("Padding inconsistente entre páginas") + elif widths_set and min(widths_set) < required_width: + warnings.append( + f"Páginas no zero-padded: se necesita ancho {required_width}, " + f"encontrado {min(widths_set)}" + ) + + # Comprobar secuencia continua + sorted_values = sorted(v for v in numeric_values if v is not None) + if sorted_values: + first = sorted_values[0] + expected = list(range(first, first + len(sorted_values))) + if sorted_values != expected: + warnings.append("Páginas no secuenciales: hay saltos en la numeración") + + return StepResult(step="check_page_numbering", changed=False, warnings=warnings) + + +def check_image_extensions(work_dir: str) -> StepResult: + """Detecta mezcla de formatos de imagen en el archivo.""" + ext_set = set() + for root, _, files in os.walk(work_dir): + for f in files: + ext = os.path.splitext(f)[1].lower() + if ext in IMAGE_EXTENSIONS: + # Normalizar .jpeg → .jpg + normalized = ".jpg" if ext == ".jpeg" else ext + ext_set.add(normalized) + + warnings = [] + if len(ext_set) > 1: + exts_str = ", ".join(sorted(ext_set)) + warnings.append(f"Extensiones de imagen mezcladas: {exts_str}") + + return StepResult(step="check_image_extensions", changed=False, warnings=warnings) + + +def check_comicinfo(work_dir: str) -> StepResult: + """Detecta ausencia de ComicInfo.xml.""" + warnings = [] + found = False + for root, _, files in os.walk(work_dir): + for f in files: + if f.lower() == "comicinfo.xml": + found = True + break + if found: + break + + if not found: + warnings.append("Falta ComicInfo.xml") + + return StepResult(step="check_comicinfo", changed=False, warnings=warnings) diff --git a/processors/cleaner.py b/processors/cleaner.py index 1c5311b..2d290e4 100644 --- a/processors/cleaner.py +++ b/processors/cleaner.py @@ -1,6 +1,7 @@ # processors/cleaner.py import os +import shutil from core.constants import TRASH_FILES from core.result import StepResult @@ -8,15 +9,21 @@ from core.result import StepResult def clean_directory(work_dir: str) -> StepResult: """ Elimina TRASH_FILES del directorio ya extraído. + Elimina tanto ficheros como directorios de basura (e.g. __MACOSX). Sin I/O de archivo de cómic; trabaja sobre el directorio temporal. """ removed = [] - for root, _, files in os.walk(work_dir): + for root, dirs, files in os.walk(work_dir, topdown=False): for f in files: if f.lower() in TRASH_FILES: full = os.path.join(root, f) os.remove(full) removed.append(os.path.relpath(full, work_dir)) + for d in dirs: + if d.lower() in TRASH_FILES: + full = os.path.join(root, d) + shutil.rmtree(full, ignore_errors=True) + removed.append(os.path.relpath(full, work_dir) + "/") details = [f"Eliminado: {r}" for r in removed] return StepResult( diff --git a/processors/image_normalizer.py b/processors/image_normalizer.py new file mode 100644 index 0000000..f3ca1ae --- /dev/null +++ b/processors/image_normalizer.py @@ -0,0 +1,68 @@ +# processors/image_normalizer.py + +import os + +from core.constants import IMAGE_EXTENSIONS +from core.result import StepResult + + +def normalize_images(work_dir: str, target_ext: str = ".jpg") -> StepResult: + """ + Convierte todas las imágenes al formato indicado por target_ext. + Requiere Pillow. Si no está instalado, devuelve un StepResult con error. + """ + try: + from PIL import Image + except ImportError: + return StepResult( + step="normalize_images", + changed=False, + errors=["Pillow no instalado. Ejecuta: pip install Pillow"], + ) + + target_ext = target_ext.lower() + if not target_ext.startswith("."): + target_ext = "." + target_ext + + # Mapa de extensión a formato PIL + FORMAT_MAP = { + ".jpg": "JPEG", + ".jpeg": "JPEG", + ".png": "PNG", + ".webp": "WEBP", + } + pil_format = FORMAT_MAP.get(target_ext) + if pil_format is None: + return StepResult( + step="normalize_images", + changed=False, + errors=[f"Formato de imagen no soportado: {target_ext}"], + ) + + changed = False + details = [] + + for root, _, files in os.walk(work_dir): + for f in files: + ext = os.path.splitext(f)[1].lower() + normalized_ext = ".jpg" if ext == ".jpeg" else ext + if ext not in IMAGE_EXTENSIONS: + continue + if normalized_ext == target_ext: + continue + + src = os.path.join(root, f) + stem = os.path.splitext(f)[0] + dst = os.path.join(root, stem + target_ext) + + with Image.open(src) as img: + # Convertir modos incompatibles con JPEG + if pil_format == "JPEG" and img.mode in ("RGBA", "P", "LA"): + img = img.convert("RGB") + img.save(dst, format=pil_format) + + os.remove(src) + details.append(f"{f} → {stem + target_ext}") + changed = True + + return StepResult(step="normalize_images", changed=changed, details=details) diff --git a/processors/page_normalizer.py b/processors/page_normalizer.py new file mode 100644 index 0000000..e31d406 --- /dev/null +++ b/processors/page_normalizer.py @@ -0,0 +1,57 @@ +# processors/page_normalizer.py + +import os +import re + +from core.constants import IMAGE_EXTENSIONS +from core.result import StepResult + + +def _natural_sort_key(name: str): + parts = re.split(r"(\d+)", name) + return [int(p) if p.isdigit() else p.lower() for p in parts] + + +def normalize_pages(work_dir: str) -> StepResult: + """ + Renombra las imágenes del cómic a una numeración secuencial con zero-padding. + Fase 1: renombra a nombres temporales para evitar colisiones. + Fase 2: renombra a 001.ext, 002.ext… con ancho = len(str(total)). + """ + # Recopilar imágenes con natural sort + images = [] + for root, _, files in os.walk(work_dir): + for f in files: + ext = os.path.splitext(f)[1].lower() + if ext in IMAGE_EXTENSIONS: + images.append(os.path.join(root, f)) + + images.sort(key=lambda p: _natural_sort_key(os.path.splitext(os.path.basename(p))[0])) + total = len(images) + if total == 0: + return StepResult(step="normalize_pages", changed=False) + + width = len(str(total)) + + # Fase 1: renombrar a temporales para evitar colisiones intermedias + temp_paths = [] + for i, src in enumerate(images): + ext = os.path.splitext(src)[1].lower() + tmp = os.path.join(os.path.dirname(src), f".tmp_rename_{i}{ext}") + os.rename(src, tmp) + temp_paths.append(tmp) + + # Fase 2: renombrar a nombre final + changed = False + details = [] + for i, tmp in enumerate(temp_paths): + ext = os.path.splitext(tmp)[1].lower() + final_name = f"{str(i + 1).zfill(width)}{ext}" + final_path = os.path.join(os.path.dirname(tmp), final_name) + os.rename(tmp, final_path) + original_name = os.path.basename(images[i]) + if original_name != final_name: + details.append(f"{original_name} → {final_name}") + changed = True + + return StepResult(step="normalize_pages", changed=changed, details=details) diff --git a/requirements.txt b/requirements.txt index da04230..6736a7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -rarfile \ No newline at end of file +rarfile +Pillow \ No newline at end of file