evita la extracció quan no es necesari

This commit is contained in:
2026-02-19 12:41:34 +01:00
parent 39bd6eeb20
commit 980c85b7af
5 changed files with 178 additions and 51 deletions
+34 -39
View File
@@ -7,18 +7,21 @@ from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
from core.result import StepResult
def check_trash(work_dir: str) -> StepResult:
def check_trash(names: list[str]) -> StepResult:
"""Detecta ficheros y directorios de basura presentes en el archivo."""
found = []
for root, dirs, files in os.walk(work_dir):
for name in files:
if name.lower() in TRASH_FILES:
found.append(os.path.relpath(os.path.join(root, name), work_dir))
for name in dirs:
if name.lower() in TRASH_FILES:
found.append(os.path.relpath(os.path.join(root, name), work_dir) + "/")
reported = set()
for name in names:
parts = name.replace("\\", "/").rstrip("/").split("/")
for i, part in enumerate(parts):
if part.lower() in TRASH_FILES:
key = "/".join(parts[: i + 1])
if key not in reported:
found.append(key)
reported.add(key)
break
warnings = [f"Basura detectada: {f}" for f in found]
warnings = [f"Basura detectada: {f}" for f in sorted(found)]
return StepResult(step="check_trash", changed=False, warnings=warnings)
@@ -27,15 +30,16 @@ def _natural_sort_key(name: str):
return [int(p) if p.isdigit() else p.lower() for p in parts]
def check_page_numbering(work_dir: str) -> StepResult:
def check_page_numbering(names: list[str]) -> StepResult:
"""Detecta páginas mal numeradas: sin zero-padding, padding inconsistente, saltos."""
images = []
for root, _, files in os.walk(work_dir):
for f in files:
if os.path.splitext(f)[1].lower() in IMAGE_EXTENSIONS:
images.append(f)
images.sort(key=lambda f: _natural_sort_key(os.path.splitext(f)[0]))
images = sorted(
[
name.replace("\\", "/").rsplit("/", 1)[-1]
for name in names
if os.path.splitext(name)[1].lower() in IMAGE_EXTENSIONS
],
key=lambda f: _natural_sort_key(os.path.splitext(f)[0]),
)
total = len(images)
if total == 0:
return StepResult(step="check_page_numbering", changed=False)
@@ -82,16 +86,15 @@ def check_page_numbering(work_dir: str) -> StepResult:
return StepResult(step="check_page_numbering", changed=False, warnings=warnings)
def check_image_extensions(work_dir: str) -> StepResult:
def check_image_extensions(names: list[str]) -> StepResult:
"""Detecta mezcla de formatos de imagen en el archivo."""
ext_set = set()
for root, _, files in os.walk(work_dir):
for f in files:
ext = os.path.splitext(f)[1].lower()
if ext in IMAGE_EXTENSIONS:
# Normalizar .jpeg → .jpg
normalized = ".jpg" if ext == ".jpeg" else ext
ext_set.add(normalized)
for name in names:
ext = os.path.splitext(name)[1].lower()
if ext in IMAGE_EXTENSIONS:
# Normalizar .jpeg → .jpg
normalized = ".jpg" if ext == ".jpeg" else ext
ext_set.add(normalized)
warnings = []
if len(ext_set) > 1:
@@ -101,19 +104,11 @@ def check_image_extensions(work_dir: str) -> StepResult:
return StepResult(step="check_image_extensions", changed=False, warnings=warnings)
def check_comicinfo(work_dir: str) -> StepResult:
def check_comicinfo(names: list[str]) -> StepResult:
"""Detecta ausencia de ComicInfo.xml."""
warnings = []
found = False
for root, _, files in os.walk(work_dir):
for f in files:
if f.lower() == "comicinfo.xml":
found = True
break
if found:
break
if not found:
warnings.append("Falta ComicInfo.xml")
found = any(
name.replace("\\", "/").rsplit("/", 1)[-1].lower() == "comicinfo.xml"
for name in names
)
warnings = [] if found else ["Falta ComicInfo.xml"]
return StepResult(step="check_comicinfo", changed=False, warnings=warnings)