afegides noves validacions
This commit is contained in:
+36
-2
@@ -10,6 +10,14 @@ from core.result import ComicResult, StepResult
|
||||
from processors.validator import validate_archive
|
||||
from processors.cleaner import clean_directory
|
||||
from processors.converter import needs_conversion, conversion_step_result
|
||||
from processors.checks import (
|
||||
check_trash,
|
||||
check_page_numbering,
|
||||
check_image_extensions,
|
||||
check_comicinfo,
|
||||
)
|
||||
from processors.page_normalizer import normalize_pages
|
||||
from processors.image_normalizer import normalize_images
|
||||
|
||||
|
||||
class Pipeline:
|
||||
@@ -17,11 +25,13 @@ class Pipeline:
|
||||
self,
|
||||
steps: list,
|
||||
desired_format: str = "cbz",
|
||||
desired_image_format: str = ".jpg",
|
||||
collision_policy: str = CollisionPolicy.ABORT,
|
||||
dry_run: bool = False,
|
||||
):
|
||||
self.steps = steps
|
||||
self.desired_format = desired_format
|
||||
self.desired_image_format = desired_image_format
|
||||
self.collision_policy = collision_policy
|
||||
self.dry_run = dry_run
|
||||
|
||||
@@ -41,7 +51,15 @@ class Pipeline:
|
||||
try:
|
||||
extract_archive(path, temp_dir)
|
||||
|
||||
# 3. Aplicar cada step sobre el directorio temporal
|
||||
# 3. Ejecutar siempre los 4 content checks
|
||||
step_results += [
|
||||
check_trash(temp_dir),
|
||||
check_page_numbering(temp_dir),
|
||||
check_image_extensions(temp_dir),
|
||||
check_comicinfo(temp_dir),
|
||||
]
|
||||
|
||||
# 4. Aplicar cada fix step sobre el directorio temporal
|
||||
any_changed = False
|
||||
|
||||
if "clean" in self.steps:
|
||||
@@ -50,6 +68,22 @@ class Pipeline:
|
||||
if clean_result.changed:
|
||||
any_changed = True
|
||||
|
||||
if "normalize_pages" in self.steps:
|
||||
norm_result = normalize_pages(temp_dir)
|
||||
step_results.append(norm_result)
|
||||
if norm_result.changed:
|
||||
any_changed = True
|
||||
|
||||
if "normalize_images" in self.steps:
|
||||
img_result = normalize_images(temp_dir, self.desired_image_format)
|
||||
step_results.append(img_result)
|
||||
if img_result.errors:
|
||||
return ComicResult(
|
||||
original_path=path, final_path=None, steps=step_results
|
||||
)
|
||||
if img_result.changed:
|
||||
any_changed = True
|
||||
|
||||
if "convert" in self.steps:
|
||||
conv_result = conversion_step_result(real_format, self.desired_format)
|
||||
step_results.append(conv_result)
|
||||
@@ -60,7 +94,7 @@ class Pipeline:
|
||||
if conv_result.changed:
|
||||
any_changed = True
|
||||
|
||||
# 4. Reempaquetar si hubo cambios o conversión de formato
|
||||
# 5. Reempaquetar si hubo cambios o conversión de formato
|
||||
needs_repack = any_changed or (
|
||||
"convert" in self.steps
|
||||
and needs_conversion(real_format, self.desired_format)
|
||||
|
||||
@@ -31,6 +31,10 @@ class ComicResult:
|
||||
return f"OK [{self.original_path}] → {dest} ({', '.join(changed_steps)})"
|
||||
return f"OK [{self.original_path}] (sin cambios)"
|
||||
|
||||
def has_issues(self) -> bool:
|
||||
"""True si algún step tiene warnings o errors."""
|
||||
return any(s.warnings or s.errors for s in self.steps)
|
||||
|
||||
def full_report(self) -> str:
|
||||
lines = [f"Cómic: {self.original_path}"]
|
||||
for s in self.steps:
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import argparse
|
||||
from core.scanner import find_comic_files
|
||||
from core.pipeline import Pipeline
|
||||
from processors.validator import validate_comic
|
||||
|
||||
|
||||
def parse_args():
|
||||
@@ -16,6 +15,9 @@ def parse_args():
|
||||
parser.add_argument("--convertir", action="store_true")
|
||||
parser.add_argument("--estandarizar", action="store_true")
|
||||
parser.add_argument("--formato", choices=["cbz", "cbr"], default="cbz")
|
||||
parser.add_argument("--renumerar", action="store_true")
|
||||
parser.add_argument("--uniformizar-imagenes", action="store_true")
|
||||
parser.add_argument("--formato-imagen", choices=["jpg", "png", "webp"], default="jpg")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
@@ -30,19 +32,30 @@ def main():
|
||||
print(f)
|
||||
|
||||
if args.validar:
|
||||
pipeline = Pipeline(steps=[])
|
||||
for f in comic_files:
|
||||
res = validate_comic(f)
|
||||
print(f"{f} → {res.summary()}")
|
||||
print()
|
||||
result = pipeline.run(f)
|
||||
if result.has_issues():
|
||||
print(result.full_report())
|
||||
print()
|
||||
return
|
||||
|
||||
steps = []
|
||||
if args.limpiar or args.estandarizar:
|
||||
steps.append("clean")
|
||||
if args.renumerar:
|
||||
steps.append("normalize_pages")
|
||||
if args.uniformizar_imagenes:
|
||||
steps.append("normalize_images")
|
||||
if args.convertir or args.estandarizar:
|
||||
steps.append("convert")
|
||||
|
||||
if steps:
|
||||
pipeline = Pipeline(steps=steps, desired_format=args.formato)
|
||||
pipeline = Pipeline(
|
||||
steps=steps,
|
||||
desired_format=args.formato,
|
||||
desired_image_format="." + args.formato_imagen,
|
||||
)
|
||||
for f in comic_files:
|
||||
result = pipeline.run(f)
|
||||
print(result.summary())
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
# processors/checks.py
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
|
||||
from core.result import StepResult
|
||||
|
||||
|
||||
def check_trash(work_dir: str) -> StepResult:
|
||||
"""Detecta ficheros y directorios de basura presentes en el archivo."""
|
||||
found = []
|
||||
for root, dirs, files in os.walk(work_dir):
|
||||
for name in files:
|
||||
if name.lower() in TRASH_FILES:
|
||||
found.append(os.path.relpath(os.path.join(root, name), work_dir))
|
||||
for name in dirs:
|
||||
if name.lower() in TRASH_FILES:
|
||||
found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/")
|
||||
|
||||
warnings = [f"Basura detectada: {f}" for f in found]
|
||||
return StepResult(step="check_trash", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
def _natural_sort_key(name: str):
|
||||
parts = re.split(r"(\d+)", name)
|
||||
return [int(p) if p.isdigit() else p.lower() for p in parts]
|
||||
|
||||
|
||||
def check_page_numbering(work_dir: str) -> StepResult:
|
||||
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
|
||||
images = []
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS:
|
||||
images.append(f)
|
||||
|
||||
images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0]))
|
||||
total = len(images)
|
||||
if total == 0:
|
||||
return StepResult(step="check_page_numbering", changed=False)
|
||||
|
||||
warnings = []
|
||||
|
||||
# Extraer primer bloque numérico del stem de cada imagen
|
||||
numeric_values = []
|
||||
numeric_widths = []
|
||||
for img in images:
|
||||
stem = os.path.splitext(img)[0]
|
||||
m = re.search(r"(\d+)", stem)
|
||||
if m:
|
||||
numeric_values.append(int(m.group(1)))
|
||||
numeric_widths.append(len(m.group(1)))
|
||||
else:
|
||||
numeric_values.append(None)
|
||||
numeric_widths.append(None)
|
||||
|
||||
has_numbers = [v for v in numeric_values if v is not None]
|
||||
if not has_numbers:
|
||||
return StepResult(step="check_page_numbering", changed=False)
|
||||
|
||||
# Comprobar zero-padding si hay >= 10 imágenes
|
||||
if total >= 10:
|
||||
required_width = len(str(total))
|
||||
widths_set = set(w for w in numeric_widths if w is not None)
|
||||
if len(widths_set) > 1:
|
||||
warnings.append("Padding inconsistente entre páginas")
|
||||
elif widths_set and min(widths_set) < required_width:
|
||||
warnings.append(
|
||||
f"Páginas no zero-padded: se necesita ancho {required_width}, "
|
||||
f"encontrado {min(widths_set)}"
|
||||
)
|
||||
|
||||
# Comprobar secuencia continua
|
||||
sorted_values = sorted(v for v in numeric_values if v is not None)
|
||||
if sorted_values:
|
||||
first = sorted_values[0]
|
||||
expected = list(range(first, first + len(sorted_values)))
|
||||
if sorted_values != expected:
|
||||
warnings.append("Páginas no secuenciales: hay saltos en la numeración")
|
||||
|
||||
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
def check_image_extensions(work_dir: str) -> StepResult:
|
||||
"""Detecta mezcla de formatos de imagen en el archivo."""
|
||||
ext_set = set()
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
ext = os.path.splitext(f)[1].lower()
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
# Normalizar .jpeg → .jpg
|
||||
normalized = ".jpg" if ext == ".jpeg" else ext
|
||||
ext_set.add(normalized)
|
||||
|
||||
warnings = []
|
||||
if len(ext_set) > 1:
|
||||
exts_str = ", ".join(sorted(ext_set))
|
||||
warnings.append(f"Extensiones de imagen mezcladas: {exts_str}")
|
||||
|
||||
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
def check_comicinfo(work_dir: str) -> StepResult:
|
||||
"""Detecta ausencia de ComicInfo.xml."""
|
||||
warnings = []
|
||||
found = False
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
if f.lower() == "comicinfo.xml":
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
break
|
||||
|
||||
if not found:
|
||||
warnings.append("Falta ComicInfo.xml")
|
||||
|
||||
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)
|
||||
@@ -1,6 +1,7 @@
|
||||
# processors/cleaner.py
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from core.constants import TRASH_FILES
|
||||
from core.result import StepResult
|
||||
|
||||
@@ -8,15 +9,21 @@ from core.result import StepResult
|
||||
def clean_directory(work_dir: str) -> StepResult:
|
||||
"""
|
||||
Elimina TRASH_FILES del directorio ya extraído.
|
||||
Elimina tanto ficheros como directorios de basura (e.g. __MACOSX).
|
||||
Sin I/O de archivo de cómic; trabaja sobre el directorio temporal.
|
||||
"""
|
||||
removed = []
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for root, dirs, files in os.walk(work_dir, topdown=False):
|
||||
for f in files:
|
||||
if f.lower() in TRASH_FILES:
|
||||
full = os.path.join(root, f)
|
||||
os.remove(full)
|
||||
removed.append(os.path.relpath(full, work_dir))
|
||||
for d in dirs:
|
||||
if d.lower() in TRASH_FILES:
|
||||
full = os.path.join(root, d)
|
||||
shutil.rmtree(full, ignore_errors=True)
|
||||
removed.append(os.path.relpath(full, work_dir) + "/")
|
||||
|
||||
details = [f"Eliminado: {r}" for r in removed]
|
||||
return StepResult(
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
# processors/image_normalizer.py
|
||||
|
||||
import os
|
||||
|
||||
from core.constants import IMAGE_EXTENSIONS
|
||||
from core.result import StepResult
|
||||
|
||||
|
||||
def normalize_images(work_dir: str, target_ext: str = ".jpg") -> StepResult:
|
||||
"""
|
||||
Convierte todas las imágenes al formato indicado por target_ext.
|
||||
Requiere Pillow. Si no está instalado, devuelve un StepResult con error.
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
return StepResult(
|
||||
step="normalize_images",
|
||||
changed=False,
|
||||
errors=["Pillow no instalado. Ejecuta: pip install Pillow"],
|
||||
)
|
||||
|
||||
target_ext = target_ext.lower()
|
||||
if not target_ext.startswith("."):
|
||||
target_ext = "." + target_ext
|
||||
|
||||
# Mapa de extensión a formato PIL
|
||||
FORMAT_MAP = {
|
||||
".jpg": "JPEG",
|
||||
".jpeg": "JPEG",
|
||||
".png": "PNG",
|
||||
".webp": "WEBP",
|
||||
}
|
||||
pil_format = FORMAT_MAP.get(target_ext)
|
||||
if pil_format is None:
|
||||
return StepResult(
|
||||
step="normalize_images",
|
||||
changed=False,
|
||||
errors=[f"Formato de imagen no soportado: {target_ext}"],
|
||||
)
|
||||
|
||||
changed = False
|
||||
details = []
|
||||
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
ext = os.path.splitext(f)[1].lower()
|
||||
normalized_ext = ".jpg" if ext == ".jpeg" else ext
|
||||
if ext not in IMAGE_EXTENSIONS:
|
||||
continue
|
||||
if normalized_ext == target_ext:
|
||||
continue
|
||||
|
||||
src = os.path.join(root, f)
|
||||
stem = os.path.splitext(f)[0]
|
||||
dst = os.path.join(root, stem + target_ext)
|
||||
|
||||
with Image.open(src) as img:
|
||||
# Convertir modos incompatibles con JPEG
|
||||
if pil_format == "JPEG" and img.mode in ("RGBA", "P", "LA"):
|
||||
img = img.convert("RGB")
|
||||
img.save(dst, format=pil_format)
|
||||
|
||||
os.remove(src)
|
||||
details.append(f"{f} → {stem + target_ext}")
|
||||
changed = True
|
||||
|
||||
return StepResult(step="normalize_images", changed=changed, details=details)
|
||||
@@ -0,0 +1,57 @@
|
||||
# processors/page_normalizer.py
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from core.constants import IMAGE_EXTENSIONS
|
||||
from core.result import StepResult
|
||||
|
||||
|
||||
def _natural_sort_key(name: str):
|
||||
parts = re.split(r"(\d+)", name)
|
||||
return [int(p) if p.isdigit() else p.lower() for p in parts]
|
||||
|
||||
|
||||
def normalize_pages(work_dir: str) -> StepResult:
|
||||
"""
|
||||
Renombra las imágenes del cómic a una numeración secuencial con zero-padding.
|
||||
Fase 1: renombra a nombres temporales para evitar colisiones.
|
||||
Fase 2: renombra a 001.ext, 002.ext… con ancho = len(str(total)).
|
||||
"""
|
||||
# Recopilar imágenes con natural sort
|
||||
images = []
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
ext = os.path.splitext(f)[1].lower()
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
images.append(os.path.join(root, f))
|
||||
|
||||
images.sort(key=lambda p: _natural_sort_key(os.path.splitext(os.path.basename(p))[0]))
|
||||
total = len(images)
|
||||
if total == 0:
|
||||
return StepResult(step="normalize_pages", changed=False)
|
||||
|
||||
width = len(str(total))
|
||||
|
||||
# Fase 1: renombrar a temporales para evitar colisiones intermedias
|
||||
temp_paths = []
|
||||
for i, src in enumerate(images):
|
||||
ext = os.path.splitext(src)[1].lower()
|
||||
tmp = os.path.join(os.path.dirname(src), f".tmp_rename_{i}{ext}")
|
||||
os.rename(src, tmp)
|
||||
temp_paths.append(tmp)
|
||||
|
||||
# Fase 2: renombrar a nombre final
|
||||
changed = False
|
||||
details = []
|
||||
for i, tmp in enumerate(temp_paths):
|
||||
ext = os.path.splitext(tmp)[1].lower()
|
||||
final_name = f"{str(i + 1).zfill(width)}{ext}"
|
||||
final_path = os.path.join(os.path.dirname(tmp), final_name)
|
||||
os.rename(tmp, final_path)
|
||||
original_name = os.path.basename(images[i])
|
||||
if original_name != final_name:
|
||||
details.append(f"{original_name} → {final_name}")
|
||||
changed = True
|
||||
|
||||
return StepResult(step="normalize_pages", changed=changed, details=details)
|
||||
+2
-1
@@ -1 +1,2 @@
|
||||
rarfile
|
||||
rarfile
|
||||
Pillow
|
||||
Reference in New Issue
Block a user