This commit is contained in:
2026-02-20 13:54:25 +01:00
parent 448c50b846
commit 8d83d27bd3
5 changed files with 120 additions and 8 deletions
+31 -3
View File
@@ -11,7 +11,7 @@ from core.backup import move_to_backup
from core.collision import CollisionPolicy, resolve_collision
from core.result import ComicResult, StepResult
from processors.validator import validate_archive
from processors.cleaner import clean_directory
from processors.cleaner import clean_directory, flatten_directory
from processors.converter import needs_conversion, conversion_step_result
from processors.checks import (
check_trash,
@@ -19,6 +19,7 @@ from processors.checks import (
check_image_extensions,
check_comicinfo,
check_foreign,
check_nested,
)
from processors.page_normalizer import normalize_pages, preview_normalize_pages
from processors.image_normalizer import (
@@ -48,10 +49,26 @@ class Pipeline:
if step == "clean":
trash_r = next((r for r in step_results if r.step == "check_trash"), None)
foreign_r = next((r for r in step_results if r.step == "check_foreign"), None)
nested_r = next((r for r in step_results if r.step == "check_nested"), None)
prefix = "Basura detectada: "
items = [w.removeprefix(prefix) for w in (trash_r.warnings if trash_r else []) if w.startswith(prefix)]
items += [w.removeprefix("Fichero extraño: ") for w in (foreign_r.warnings if foreign_r else [])]
return {"items": items}
flatten = False
flatten_files: list[tuple[str, str]] = []
if nested_r and nested_r.warnings:
w = nested_r.warnings[0]
if w.startswith("Imágenes en subdirectorio: "):
flatten = True
for entry in sorted(os.listdir(temp_dir)):
subpath = os.path.join(temp_dir, entry)
if not os.path.isdir(subpath):
continue
for root, _dirs, files in os.walk(subpath):
for f in sorted(files):
src_abs = os.path.join(root, f)
src_rel = os.path.relpath(src_abs, temp_dir)
flatten_files.append((src_rel, f))
return {"items": items, "flatten": flatten, "flatten_files": flatten_files}
elif step == "normalize_pages":
renames = preview_normalize_pages(temp_dir)
@@ -83,8 +100,12 @@ class Pipeline:
if step == "clean":
trash = next((r for r in step_results if r.step == "check_trash"), None)
foreign = next((r for r in step_results if r.step == "check_foreign"), None)
nested = next((r for r in step_results if r.step == "check_nested"), None)
if (trash and trash.warnings) or (foreign and foreign.warnings):
return True
# Solo necesita extracción si es el caso aplanable (1 subdir)
if nested and nested.warnings and nested.warnings[0].startswith("Imágenes en subdirectorio: "):
return True
return False
def run(self, path: str, confirm_fn=None) -> ComicResult:
@@ -112,6 +133,7 @@ class Pipeline:
check_image_extensions(names),
check_comicinfo(names),
check_foreign(names),
check_nested(names),
]
# 4. Pre-flight: si ningún step necesita extracción, salir sin tocar el archivo
@@ -128,12 +150,18 @@ class Pipeline:
if "clean" in self.steps:
preview = self._compute_preview("clean", temp_dir, step_results)
if preview.get("items"):
if preview.get("items") or preview.get("flatten"):
if confirm_fn is None or confirm_fn("clean", preview):
clean_result = clean_directory(temp_dir)
step_results.append(clean_result)
if clean_result.changed:
any_changed = True
# Aplanar si caso simple
if preview.get("flatten"):
flat_result = flatten_directory(temp_dir)
step_results.append(flat_result)
if flat_result.changed:
any_changed = True
if "normalize_pages" in self.steps:
preview = self._compute_preview("normalize_pages", temp_dir, step_results)
+4
View File
@@ -132,6 +132,8 @@ class SummaryCollector:
("Extensión incorrecta", "validate", lambda w: "Extensión incorrecta" in w, ["convert"], "convertido"),
("Basura detectada", "check_trash", lambda w: True, ["clean"], "limpiado"),
("Ficheros extraños", "check_foreign", lambda w: True, ["clean"], "limpiado"),
("Estructura anidada", "check_nested", lambda w: w.startswith("Imágenes en subdirectorio: "), ["clean"], "aplanado"),
("Estructura compleja", "check_nested", lambda w: w.startswith("Múltiples subdirectorios"), [], None),
("Numeración de páginas", "check_page_numbering", lambda w: True, ["normalize_pages"], "renumerado"),
("Imágenes mezcladas", "check_image_extensions", lambda w: True, ["normalize_images", "convert_images"], "normalizado"),
("Sin ComicInfo.xml", "check_comicinfo", lambda w: True, [], None),
@@ -159,6 +161,8 @@ class SummaryCollector:
elif step_name == "check_foreign":
items = [w.removeprefix("Fichero extraño: ") for w in msgs]
entries.append((r.original_path, ", ".join(items), annotation))
elif step_name == "check_nested":
entries.append((r.original_path, msgs[0], annotation))
else:
entries.append((r.original_path, msgs[0], annotation))
if entries:
+18 -3
View File
@@ -37,9 +37,24 @@ def _print_preview(step: str, preview: dict, formato: str) -> None:
fmt = formato.upper()
if step == "clean":
print("Ficheros basura a eliminar:")
for item in preview["items"]:
print(f" - {item}")
trash_items = [i for i in preview["items"] if not i.startswith("[Aplanar]")]
flatten_files = preview.get("flatten_files", [])
if trash_items:
print("Ficheros a eliminar:")
for item in trash_items:
print(f" - {item}")
if flatten_files:
n = len(flatten_files)
display = flatten_files[:10] if n > 10 else flatten_files
col_w = max((len(src) for src, _ in display), default=0) + 2
print(f"Aplanar estructura ({n} ficheros):")
for src, dst in display:
print(f" {src:<{col_w}}{dst}")
if n > 10:
print(f" ... y {n - 10} más")
print(f"Formato final del archivo: {fmt}")
elif step == "normalize_pages":
+31 -2
View File
@@ -108,9 +108,12 @@ def check_foreign(names: list[str]) -> StepResult:
"""Detecta ficheros que no son imágenes ni metadata permitida."""
found = []
for name in names:
basename = name.replace("\\", "/").rstrip("/").rsplit("/", 1)[-1]
normalized = name.replace("\\", "/")
if normalized.endswith("/"):
continue # entrada de directorio — ignorar siempre
basename = normalized.rsplit("/", 1)[-1]
if not basename:
continue # entrada de directorio
continue
ext = os.path.splitext(basename)[1].lower()
if ext not in IMAGE_EXTENSIONS and basename.lower() not in FOREIGN_ALLOWED:
found.append(name)
@@ -118,6 +121,32 @@ def check_foreign(names: list[str]) -> StepResult:
return StepResult(step="check_foreign", changed=False, warnings=warnings)
def check_nested(names: list[str]) -> StepResult:
"""Detecta imágenes en subdirectorios en lugar de en la raíz del archivo."""
subdirs_with_images: set[str] = set()
for name in names:
normalized = name.replace("\\", "/")
if normalized.endswith("/"):
continue
parts = normalized.split("/")
if len(parts) < 2:
continue # fichero en raíz
ext = os.path.splitext(parts[-1])[1].lower()
if ext in IMAGE_EXTENSIONS:
subdirs_with_images.add(parts[0])
if not subdirs_with_images:
return StepResult(step="check_nested", changed=False)
if len(subdirs_with_images) == 1:
subdir = next(iter(subdirs_with_images))
warnings = [f"Imágenes en subdirectorio: {subdir}/"]
else:
listing = ", ".join(sorted(subdirs_with_images))
warnings = [f"Múltiples subdirectorios con imágenes: {listing}"]
return StepResult(step="check_nested", changed=False, warnings=warnings)
def check_comicinfo(names: list[str]) -> StepResult:
"""Detecta ausencia de ComicInfo.xml."""
found = any(
+36
View File
@@ -30,9 +30,45 @@ def clean_directory(work_dir: str) -> StepResult:
shutil.rmtree(full, ignore_errors=True)
removed.append(os.path.relpath(full, work_dir) + "/")
# Eliminar subdirectorios que hayan quedado vacíos
for root, dirs, files in os.walk(work_dir, topdown=False):
if root == work_dir:
continue
if not os.listdir(root):
os.rmdir(root)
removed.append(os.path.relpath(root, work_dir) + "/")
details = [f"Eliminado: {r}" for r in removed]
return StepResult(
step="clean",
changed=bool(removed),
details=details,
)
def flatten_directory(work_dir: str) -> StepResult:
"""
Mueve imágenes de un único subdirectorio a la raíz de work_dir.
Precondición: solo existe 1 subdir con imágenes (validado antes de llamar).
"""
moved = []
for entry in os.listdir(work_dir):
subdir = os.path.join(work_dir, entry)
if not os.path.isdir(subdir):
continue
for root, _dirs, files in os.walk(subdir):
for f in files:
src = os.path.join(root, f)
dst = os.path.join(work_dir, f)
if os.path.exists(dst):
base, ext = os.path.splitext(f)
counter = 1
while os.path.exists(dst):
dst = os.path.join(work_dir, f"{base}_{counter}{ext}")
counter += 1
shutil.move(src, dst)
moved.append(f)
shutil.rmtree(subdir, ignore_errors=True)
details = [f"Aplanado: {f}" for f in moved]
return StepResult(step="flatten", changed=bool(moved), details=details)