afegides noves validacions

This commit is contained in:
2026-02-19 09:16:39 +01:00
parent 4e86771964
commit 56954717e3
8 changed files with 312 additions and 9 deletions
+36 -2
View File
@@ -10,6 +10,14 @@ from core.result import ComicResult, StepResult
from processors.validator import validate_archive
from processors.cleaner import clean_directory
from processors.converter import needs_conversion, conversion_step_result
from processors.checks import (
check_trash,
check_page_numbering,
check_image_extensions,
check_comicinfo,
)
from processors.page_normalizer import normalize_pages
from processors.image_normalizer import normalize_images
class Pipeline:
@@ -17,11 +25,13 @@ class Pipeline:
self,
steps: list,
desired_format: str = "cbz",
desired_image_format: str = ".jpg",
collision_policy: str = CollisionPolicy.ABORT,
dry_run: bool = False,
):
self.steps = steps
self.desired_format = desired_format
self.desired_image_format = desired_image_format
self.collision_policy = collision_policy
self.dry_run = dry_run
@@ -41,7 +51,15 @@ class Pipeline:
try:
extract_archive(path, temp_dir)
# 3. Aplicar cada step sobre el directorio temporal
# 3. Ejecutar siempre los 4 content checks
step_results += [
check_trash(temp_dir),
check_page_numbering(temp_dir),
check_image_extensions(temp_dir),
check_comicinfo(temp_dir),
]
# 4. Aplicar cada fix step sobre el directorio temporal
any_changed = False
if "clean" in self.steps:
@@ -50,6 +68,22 @@ class Pipeline:
if clean_result.changed:
any_changed = True
if "normalize_pages" in self.steps:
norm_result = normalize_pages(temp_dir)
step_results.append(norm_result)
if norm_result.changed:
any_changed = True
if "normalize_images" in self.steps:
img_result = normalize_images(temp_dir, self.desired_image_format)
step_results.append(img_result)
if img_result.errors:
return ComicResult(
original_path=path, final_path=None, steps=step_results
)
if img_result.changed:
any_changed = True
if "convert" in self.steps:
conv_result = conversion_step_result(real_format, self.desired_format)
step_results.append(conv_result)
@@ -60,7 +94,7 @@ class Pipeline:
if conv_result.changed:
any_changed = True
# 4. Reempaquetar si hubo cambios o conversión de formato
# 5. Reempaquetar si hubo cambios o conversión de formato
needs_repack = any_changed or (
"convert" in self.steps
and needs_conversion(real_format, self.desired_format)
+4
View File
@@ -31,6 +31,10 @@ class ComicResult:
return f"OK [{self.original_path}] → {dest} ({', '.join(changed_steps)})"
return f"OK [{self.original_path}] (sin cambios)"
def has_issues(self) -> bool:
"""True si algún step tiene warnings o errors."""
return any(s.warnings or s.errors for s in self.steps)
def full_report(self) -> str:
lines = [f"Cómic: {self.original_path}"]
for s in self.steps:
+18 -5
View File
@@ -3,7 +3,6 @@
import argparse
from core.scanner import find_comic_files
from core.pipeline import Pipeline
from processors.validator import validate_comic
def parse_args():
@@ -16,6 +15,9 @@ def parse_args():
parser.add_argument("--convertir", action="store_true")
parser.add_argument("--estandarizar", action="store_true")
parser.add_argument("--formato", choices=["cbz", "cbr"], default="cbz")
parser.add_argument("--renumerar", action="store_true")
parser.add_argument("--uniformizar-imagenes", action="store_true")
parser.add_argument("--formato-imagen", choices=["jpg", "png", "webp"], default="jpg")
return parser.parse_args()
@@ -30,19 +32,30 @@ def main():
print(f)
if args.validar:
pipeline = Pipeline(steps=[])
for f in comic_files:
res = validate_comic(f)
print(f"{f}{res.summary()}")
print()
result = pipeline.run(f)
if result.has_issues():
print(result.full_report())
print()
return
steps = []
if args.limpiar or args.estandarizar:
steps.append("clean")
if args.renumerar:
steps.append("normalize_pages")
if args.uniformizar_imagenes:
steps.append("normalize_images")
if args.convertir or args.estandarizar:
steps.append("convert")
if steps:
pipeline = Pipeline(steps=steps, desired_format=args.formato)
pipeline = Pipeline(
steps=steps,
desired_format=args.formato,
desired_image_format="." + args.formato_imagen,
)
for f in comic_files:
result = pipeline.run(f)
print(result.summary())
+119
View File
@@ -0,0 +1,119 @@
# processors/checks.py
import os
import re
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
from core.result import StepResult
def check_trash(work_dir: str) -> StepResult:
"""Detecta ficheros y directorios de basura presentes en el archivo."""
found = []
for root, dirs, files in os.walk(work_dir):
for name in files:
if name.lower() in TRASH_FILES:
found.append(os.path.relpath(os.path.join(root, name), work_dir))
for name in dirs:
if name.lower() in TRASH_FILES:
found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/")
warnings = [f"Basura detectada: {f}" for f in found]
return StepResult(step="check_trash", changed=False, warnings=warnings)
def _natural_sort_key(name: str):
parts = re.split(r"(\d+)", name)
return [int(p) if p.isdigit() else p.lower() for p in parts]
def check_page_numbering(work_dir: str) -> StepResult:
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
images = []
for root, _, files in os.walk(work_dir):
for f in files:
if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS:
images.append(f)
images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0]))
total = len(images)
if total == 0:
return StepResult(step="check_page_numbering", changed=False)
warnings = []
# Extraer primer bloque numérico del stem de cada imagen
numeric_values = []
numeric_widths = []
for img in images:
stem = os.path.splitext(img)[0]
m = re.search(r"(\d+)", stem)
if m:
numeric_values.append(int(m.group(1)))
numeric_widths.append(len(m.group(1)))
else:
numeric_values.append(None)
numeric_widths.append(None)
has_numbers = [v for v in numeric_values if v is not None]
if not has_numbers:
return StepResult(step="check_page_numbering", changed=False)
# Comprobar zero-padding si hay >= 10 imágenes
if total >= 10:
required_width = len(str(total))
widths_set = set(w for w in numeric_widths if w is not None)
if len(widths_set) > 1:
warnings.append("Padding inconsistente entre páginas")
elif widths_set and min(widths_set) < required_width:
warnings.append(
f"Páginas no zero-padded: se necesita ancho {required_width}, "
f"encontrado {min(widths_set)}"
)
# Comprobar secuencia continua
sorted_values = sorted(v for v in numeric_values if v is not None)
if sorted_values:
first = sorted_values[0]
expected = list(range(first, first + len(sorted_values)))
if sorted_values != expected:
warnings.append("Páginas no secuenciales: hay saltos en la numeración")
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
def check_image_extensions(work_dir: str) -> StepResult:
"""Detecta mezcla de formatos de imagen en el archivo."""
ext_set = set()
for root, _, files in os.walk(work_dir):
for f in files:
ext = os.path.splitext(f)[1].lower()
if ext in IMAGE_EXTENSIONS:
# Normalizar .jpeg → .jpg
normalized = ".jpg" if ext == ".jpeg" else ext
ext_set.add(normalized)
warnings = []
if len(ext_set) > 1:
exts_str = ", ".join(sorted(ext_set))
warnings.append(f"Extensiones de imagen mezcladas: {exts_str}")
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
def check_comicinfo(work_dir: str) -> StepResult:
"""Detecta ausencia de ComicInfo.xml."""
warnings = []
found = False
for root, _, files in os.walk(work_dir):
for f in files:
if f.lower() == "comicinfo.xml":
found = True
break
if found:
break
if not found:
warnings.append("Falta ComicInfo.xml")
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)
+8 -1
View File
@@ -1,6 +1,7 @@
# processors/cleaner.py
import os
import shutil
from core.constants import TRASH_FILES
from core.result import StepResult
@@ -8,15 +9,21 @@ from core.result import StepResult
def clean_directory(work_dir: str) -> StepResult:
"""
Elimina TRASH_FILES del directorio ya extraído.
Elimina tanto ficheros como directorios de basura (e.g. __MACOSX).
Sin I/O de archivo de cómic; trabaja sobre el directorio temporal.
"""
removed = []
for root, _, files in os.walk(work_dir):
for root, dirs, files in os.walk(work_dir, topdown=False):
for f in files:
if f.lower() in TRASH_FILES:
full = os.path.join(root, f)
os.remove(full)
removed.append(os.path.relpath(full, work_dir))
for d in dirs:
if d.lower() in TRASH_FILES:
full = os.path.join(root, d)
shutil.rmtree(full, ignore_errors=True)
removed.append(os.path.relpath(full, work_dir) + "/")
details = [f"Eliminado: {r}" for r in removed]
return StepResult(
+68
View File
@@ -0,0 +1,68 @@
# processors/image_normalizer.py
import os
from core.constants import IMAGE_EXTENSIONS
from core.result import StepResult
def normalize_images(work_dir: str, target_ext: str = ".jpg") -> StepResult:
"""
Convierte todas las imágenes al formato indicado por target_ext.
Requiere Pillow. Si no está instalado, devuelve un StepResult con error.
"""
try:
from PIL import Image
except ImportError:
return StepResult(
step="normalize_images",
changed=False,
errors=["Pillow no instalado. Ejecuta: pip install Pillow"],
)
target_ext = target_ext.lower()
if not target_ext.startswith("."):
target_ext = "." + target_ext
# Mapa de extensión a formato PIL
FORMAT_MAP = {
".jpg": "JPEG",
".jpeg": "JPEG",
".png": "PNG",
".webp": "WEBP",
}
pil_format = FORMAT_MAP.get(target_ext)
if pil_format is None:
return StepResult(
step="normalize_images",
changed=False,
errors=[f"Formato de imagen no soportado: {target_ext}"],
)
changed = False
details = []
for root, _, files in os.walk(work_dir):
for f in files:
ext = os.path.splitext(f)[1].lower()
normalized_ext = ".jpg" if ext == ".jpeg" else ext
if ext not in IMAGE_EXTENSIONS:
continue
if normalized_ext == target_ext:
continue
src = os.path.join(root, f)
stem = os.path.splitext(f)[0]
dst = os.path.join(root, stem + target_ext)
with Image.open(src) as img:
# Convertir modos incompatibles con JPEG
if pil_format == "JPEG" and img.mode in ("RGBA", "P", "LA"):
img = img.convert("RGB")
img.save(dst, format=pil_format)
os.remove(src)
details.append(f"{f}{stem + target_ext}")
changed = True
return StepResult(step="normalize_images", changed=changed, details=details)
+57
View File
@@ -0,0 +1,57 @@
# processors/page_normalizer.py
import os
import re
from core.constants import IMAGE_EXTENSIONS
from core.result import StepResult
def _natural_sort_key(name: str):
parts = re.split(r"(\d+)", name)
return [int(p) if p.isdigit() else p.lower() for p in parts]
def normalize_pages(work_dir: str) -> StepResult:
"""
Renombra las imágenes del cómic a una numeración secuencial con zero-padding.
Fase 1: renombra a nombres temporales para evitar colisiones.
Fase 2: renombra a 001.ext, 002.ext… con ancho = len(str(total)).
"""
# Recopilar imágenes con natural sort
images = []
for root, _, files in os.walk(work_dir):
for f in files:
ext = os.path.splitext(f)[1].lower()
if ext in IMAGE_EXTENSIONS:
images.append(os.path.join(root, f))
images.sort(key=lambda p: _natural_sort_key(os.path.splitext(os.path.basename(p))[0]))
total = len(images)
if total == 0:
return StepResult(step="normalize_pages", changed=False)
width = len(str(total))
# Fase 1: renombrar a temporales para evitar colisiones intermedias
temp_paths = []
for i, src in enumerate(images):
ext = os.path.splitext(src)[1].lower()
tmp = os.path.join(os.path.dirname(src), f".tmp_rename_{i}{ext}")
os.rename(src, tmp)
temp_paths.append(tmp)
# Fase 2: renombrar a nombre final
changed = False
details = []
for i, tmp in enumerate(temp_paths):
ext = os.path.splitext(tmp)[1].lower()
final_name = f"{str(i + 1).zfill(width)}{ext}"
final_path = os.path.join(os.path.dirname(tmp), final_name)
os.rename(tmp, final_path)
original_name = os.path.basename(images[i])
if original_name != final_name:
details.append(f"{original_name}{final_name}")
changed = True
return StepResult(step="normalize_pages", changed=changed, details=details)
+2 -1
View File
@@ -1 +1,2 @@
rarfile
rarfile
Pillow