evita la extracció quan no es necesari
This commit is contained in:
@@ -37,6 +37,12 @@ def open_archive(path: str):
|
||||
raise ArchiveError(f"Formato desconocido o archivo corrupto: {path}")
|
||||
|
||||
|
||||
def list_archive_names(path: str) -> list[str]:
|
||||
"""Lista los miembros del archivo sin extraerlo. Lanza ArchiveError si falla."""
|
||||
with open_archive(path) as arc:
|
||||
return arc.namelist()
|
||||
|
||||
|
||||
def extract_archive(path: str, dest_dir: str) -> str:
|
||||
"""Extrae el archivo en dest_dir. Devuelve dest_dir."""
|
||||
archive = open_archive(path)
|
||||
|
||||
+35
-12
@@ -6,7 +6,7 @@ import shutil
|
||||
|
||||
import rarfile
|
||||
|
||||
from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError
|
||||
from core.archive import detect_real_format, extract_archive, repack_as_cbz, ArchiveError, list_archive_names
|
||||
from core.collision import CollisionPolicy, resolve_collision
|
||||
from core.result import ComicResult, StepResult
|
||||
from processors.validator import validate_archive
|
||||
@@ -69,6 +69,18 @@ class Pipeline:
|
||||
|
||||
return {}
|
||||
|
||||
def _needs_extraction(self, step_results: list, real_format: str) -> bool:
|
||||
for step in self.steps:
|
||||
if step in ("normalize_pages", "normalize_images", "convert_images"):
|
||||
return True
|
||||
if step == "convert" and needs_conversion(real_format, self.desired_format):
|
||||
return True
|
||||
if step == "clean":
|
||||
trash = next((r for r in step_results if r.step == "check_trash"), None)
|
||||
if trash and trash.warnings:
|
||||
return True
|
||||
return False
|
||||
|
||||
def run(self, path: str, confirm_fn=None) -> ComicResult:
|
||||
step_results = []
|
||||
|
||||
@@ -80,20 +92,31 @@ class Pipeline:
|
||||
|
||||
real_format = detect_real_format(path)
|
||||
|
||||
# 2. Extraer una sola vez
|
||||
# 2. Obtener lista de miembros sin extraer
|
||||
try:
|
||||
names = list_archive_names(path)
|
||||
except Exception as exc:
|
||||
step_results.append(StepResult(step="list", changed=False, errors=[str(exc)]))
|
||||
return ComicResult(original_path=path, final_path=None, steps=step_results)
|
||||
|
||||
# 3. Ejecutar siempre los 4 content checks sobre los nombres (sin extraer)
|
||||
step_results += [
|
||||
check_trash(names),
|
||||
check_page_numbering(names),
|
||||
check_image_extensions(names),
|
||||
check_comicinfo(names),
|
||||
]
|
||||
|
||||
# 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
|
||||
if not self._needs_extraction(step_results, real_format):
|
||||
return ComicResult(original_path=path, final_path=path, steps=step_results)
|
||||
|
||||
# 5. Extraer una sola vez
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
extract_archive(path, temp_dir)
|
||||
|
||||
# 3. Ejecutar siempre los 4 content checks
|
||||
step_results += [
|
||||
check_trash(temp_dir),
|
||||
check_page_numbering(temp_dir),
|
||||
check_image_extensions(temp_dir),
|
||||
check_comicinfo(temp_dir),
|
||||
]
|
||||
|
||||
# 4. Aplicar cada fix step sobre el directorio temporal
|
||||
# 6. Aplicar cada fix step sobre el directorio temporal
|
||||
any_changed = False
|
||||
|
||||
if "clean" in self.steps:
|
||||
@@ -152,7 +175,7 @@ class Pipeline:
|
||||
if conv_result.changed:
|
||||
any_changed = True
|
||||
|
||||
# 5. Reempaquetar si hubo cambios o conversión de formato
|
||||
# 7. Reempaquetar si hubo cambios o conversión de formato
|
||||
needs_repack = any_changed or (
|
||||
"convert" in self.steps
|
||||
and needs_conversion(real_format, self.desired_format)
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
# core/summary.py
|
||||
|
||||
from core.result import ComicResult
|
||||
|
||||
_CORRUPT_PATTERNS = ("BadRarFile", "BadZipFile", "corrupto", "Failed to read")
|
||||
_BORDER = "═" * 42
|
||||
|
||||
|
||||
class SummaryCollector:
|
||||
def __init__(self):
|
||||
self._results: list[ComicResult] = []
|
||||
|
||||
def add(self, result: ComicResult) -> None:
|
||||
self._results.append(result)
|
||||
|
||||
def _is_corrupt(self, result: ComicResult) -> bool:
|
||||
all_errors = [e for s in result.steps for e in s.errors]
|
||||
return any(
|
||||
pattern in err
|
||||
for err in all_errors
|
||||
for pattern in _CORRUPT_PATTERNS
|
||||
)
|
||||
|
||||
def render(self) -> str:
|
||||
results = self._results
|
||||
total = len(results)
|
||||
if total == 0:
|
||||
return ""
|
||||
|
||||
# Categorize
|
||||
errors = [r for r in results if not r.ok() or r.final_path is None]
|
||||
ok_results = [r for r in results if r.ok() and r.final_path is not None]
|
||||
|
||||
modified = [r for r in ok_results if any(s.changed for s in r.steps)]
|
||||
warnings_only = [
|
||||
r for r in ok_results
|
||||
if not any(s.changed for s in r.steps) and r.has_issues()
|
||||
]
|
||||
no_changes = [
|
||||
r for r in ok_results
|
||||
if not any(s.changed for s in r.steps) and not r.has_issues()
|
||||
]
|
||||
|
||||
corrupt = [r for r in errors if self._is_corrupt(r)]
|
||||
other_errors = [r for r in errors if not self._is_corrupt(r)]
|
||||
|
||||
# Operation breakdown (among modified)
|
||||
def count_step(step_names: list[str]) -> int:
|
||||
return sum(
|
||||
1 for r in modified
|
||||
if any(s.step in step_names and s.changed for s in r.steps)
|
||||
)
|
||||
|
||||
cleaned = count_step(["clean"])
|
||||
pages_normalized = count_step(["normalize_pages"])
|
||||
images_converted = count_step(["normalize_images", "convert_images"])
|
||||
format_converted = count_step(["convert"])
|
||||
|
||||
lines = [
|
||||
_BORDER,
|
||||
" RESUMEN DEL PROCESAMIENTO",
|
||||
_BORDER,
|
||||
f" Total procesados : {total:>3}",
|
||||
f" Sin cambios : {len(no_changes):>3}",
|
||||
f" Modificados : {len(modified):>3}",
|
||||
]
|
||||
if modified:
|
||||
if cleaned:
|
||||
lines.append(f" · Limpiados : {cleaned:>3}")
|
||||
if pages_normalized:
|
||||
lines.append(f" · Páginas normalizadas : {pages_normalized:>3}")
|
||||
if images_converted:
|
||||
lines.append(f" · Imágenes convertidas : {images_converted:>3}")
|
||||
if format_converted:
|
||||
lines.append(f" · Formato convertido : {format_converted:>3}")
|
||||
lines.append(f" Advertencias : {len(warnings_only):>3}")
|
||||
lines.append(f" Errores : {len(errors):>3}")
|
||||
if errors:
|
||||
lines.append(f" · Corruptos : {len(corrupt):>3}")
|
||||
lines.append(f" · Otros errores : {len(other_errors):>3}")
|
||||
lines.append(_BORDER)
|
||||
|
||||
if corrupt:
|
||||
lines.append("")
|
||||
lines.append("Archivos corruptos:")
|
||||
for r in corrupt:
|
||||
lines.append(f" {r.original_path}")
|
||||
|
||||
if other_errors:
|
||||
lines.append("")
|
||||
lines.append("Otros errores:")
|
||||
for r in other_errors:
|
||||
all_errs = [e for s in r.steps for e in s.errors]
|
||||
lines.append(f" {r.original_path} — {'; '.join(all_errs)}")
|
||||
|
||||
return "\n".join(lines)
|
||||
@@ -3,6 +3,7 @@
|
||||
import argparse
|
||||
from core.scanner import find_comic_files
|
||||
from core.pipeline import Pipeline
|
||||
from core.summary import SummaryCollector
|
||||
|
||||
_COL_W = 30
|
||||
_SEP = "─" * 44
|
||||
@@ -84,11 +85,14 @@ def main():
|
||||
|
||||
if args.validar:
|
||||
pipeline = Pipeline(steps=[])
|
||||
collector = SummaryCollector()
|
||||
for f in comic_files:
|
||||
result = pipeline.run(f)
|
||||
collector.add(result)
|
||||
if result.has_issues():
|
||||
print(result.full_report())
|
||||
print()
|
||||
print(collector.render())
|
||||
return
|
||||
|
||||
# --- Construir steps ---
|
||||
@@ -111,10 +115,13 @@ def main():
|
||||
desired_format=args.formato,
|
||||
desired_image_format="." + args.formato_imagen,
|
||||
)
|
||||
collector = SummaryCollector()
|
||||
for f in comic_files:
|
||||
print(f"\n=== {f} ===")
|
||||
result = pipeline.run(f, confirm_fn=confirm_fn)
|
||||
print(result.summary())
|
||||
collector.add(result)
|
||||
print(f"\n{collector.render()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
+34
-39
@@ -7,18 +7,21 @@ from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
|
||||
from core.result import StepResult
|
||||
|
||||
|
||||
def check_trash(work_dir: str) -> StepResult:
|
||||
def check_trash(names: list[str]) -> StepResult:
|
||||
"""Detecta ficheros y directorios de basura presentes en el archivo."""
|
||||
found = []
|
||||
for root, dirs, files in os.walk(work_dir):
|
||||
for name in files:
|
||||
if name.lower() in TRASH_FILES:
|
||||
found.append(os.path.relpath(os.path.join(root, name), work_dir))
|
||||
for name in dirs:
|
||||
if name.lower() in TRASH_FILES:
|
||||
found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/")
|
||||
reported = set()
|
||||
for name in names:
|
||||
parts = name.replace("\\", "/").rstrip("/").split("/")
|
||||
for i, part in enumerate(parts):
|
||||
if part.lower() in TRASH_FILES:
|
||||
key = "/".join(parts[: i + 1])
|
||||
if key not in reported:
|
||||
found.append(key)
|
||||
reported.add(key)
|
||||
break
|
||||
|
||||
warnings = [f"Basura detectada: {f}" for f in found]
|
||||
warnings = [f"Basura detectada: {f}" for f in sorted(found)]
|
||||
return StepResult(step="check_trash", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
@@ -27,15 +30,16 @@ def _natural_sort_key(name: str):
|
||||
return [int(p) if p.isdigit() else p.lower() for p in parts]
|
||||
|
||||
|
||||
def check_page_numbering(work_dir: str) -> StepResult:
|
||||
def check_page_numbering(names: list[str]) -> StepResult:
|
||||
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
|
||||
images = []
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS:
|
||||
images.append(f)
|
||||
|
||||
images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0]))
|
||||
images = sorted(
|
||||
[
|
||||
name.replace("\\", "/").rsplit("/", 1)[-1]
|
||||
for name in names
|
||||
if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS
|
||||
],
|
||||
key=lambda f: _natural_sort_key(os.path.splitext(f)[0]),
|
||||
)
|
||||
total = len(images)
|
||||
if total == 0:
|
||||
return StepResult(step="check_page_numbering", changed=False)
|
||||
@@ -82,16 +86,15 @@ def check_page_numbering(work_dir: str) -> StepResult:
|
||||
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
def check_image_extensions(work_dir: str) -> StepResult:
|
||||
def check_image_extensions(names: list[str]) -> StepResult:
|
||||
"""Detecta mezcla de formatos de imagen en el archivo."""
|
||||
ext_set = set()
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
ext = os.path.splitext(f)[1].lower()
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
# Normalizar .jpeg → .jpg
|
||||
normalized = ".jpg" if ext == ".jpeg" else ext
|
||||
ext_set.add(normalized)
|
||||
for name in names:
|
||||
ext = os.path.splitext(name)[1].lower()
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
# Normalizar .jpeg → .jpg
|
||||
normalized = ".jpg" if ext == ".jpeg" else ext
|
||||
ext_set.add(normalized)
|
||||
|
||||
warnings = []
|
||||
if len(ext_set) > 1:
|
||||
@@ -101,19 +104,11 @@ def check_image_extensions(work_dir: str) -> StepResult:
|
||||
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
|
||||
|
||||
|
||||
def check_comicinfo(work_dir: str) -> StepResult:
|
||||
def check_comicinfo(names: list[str]) -> StepResult:
|
||||
"""Detecta ausencia de ComicInfo.xml."""
|
||||
warnings = []
|
||||
found = False
|
||||
for root, _, files in os.walk(work_dir):
|
||||
for f in files:
|
||||
if f.lower() == "comicinfo.xml":
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
break
|
||||
|
||||
if not found:
|
||||
warnings.append("Falta ComicInfo.xml")
|
||||
|
||||
found = any(
|
||||
name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml"
|
||||
for name in names
|
||||
)
|
||||
warnings = [] if found else ["Falta ComicInfo.xml"]
|
||||
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)
|
||||
|
||||
Reference in New Issue
Block a user