cleaner.py i standarizer.py

This commit is contained in:
2026-02-18 13:07:03 +01:00
parent eb589ce949
commit f374bdb023
6 changed files with 346 additions and 3 deletions
+10
View File
@@ -0,0 +1,10 @@
# core/constants.py
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
TRASH_FILES = {
"thumbs.db",
".ds_store",
"__macosx",
"desktop.ini",
}
+68 -3
View File
@@ -3,6 +3,10 @@
import argparse
from core.scanner import find_comic_files
from processors.validator import validate_comic
from processors.cleaner import clean_comic
from processors.converter import convert_comic
from processors.standardizer import standardize_comic
def parse_args():
parser = argparse.ArgumentParser(
@@ -28,9 +32,31 @@ def parse_args():
help="Validar los archivos encontrados"
)
# Aquí podremos añadir más opciones en el futuro:
# parser.add_argument("--convertir", action="store_true", help="Convertir CBR a CBZ")
# parser.add_argument("--organizar", action="store_true", help="Organizar la colección")
parser.add_argument(
"--limpiar",
action="store_true",
help="Eliminar archivos basura y reconstruir CBZ limpios"
)
parser.add_argument(
"--convertir",
action="store_true",
help="Convertir los archivos al formato indicado con --formato"
)
parser.add_argument(
"--formato",
type=str,
choices=["cbz", "cbr"],
default="cbz",
help="Formato final deseado (por defecto: cbz)"
)
parser.add_argument(
"--estandarizar",
action="store_true",
help="Pipeline completo: limpiar, convertir y normalizar"
)
return parser.parse_args()
@@ -45,12 +71,18 @@ def main():
print("No se encontraron archivos .cbr o .cbz")
return
# ------------------------------------------------------------
# LISTAR
# ------------------------------------------------------------
if args.listar:
print("Archivos encontrados:")
for f in comic_files:
print(f" - {f}")
print()
# ------------------------------------------------------------
# VALIDAR
# ------------------------------------------------------------
if args.validar:
print("Validando archivos...\n")
for f in comic_files:
@@ -69,6 +101,39 @@ def main():
print()
# ------------------------------------------------------------
# LIMPIAR
# ------------------------------------------------------------
if args.limpiar:
print("Limpiando archivos...\n")
for f in comic_files:
result = clean_comic(f)
print(result)
print()
# ------------------------------------------------------------
# CONVERTIR
# ------------------------------------------------------------
if args.convertir:
print(f"Convirtiendo archivos al formato {args.formato}...\n")
for f in comic_files:
info = convert_comic(f, args.formato)
if info["needs_conversion"]:
print(f"Convertido: {f}{info['target_path']}")
else:
print(f"Sin cambios: {f}")
print()
# ------------------------------------------------------------
# ESTANDARIZAR
# ------------------------------------------------------------
if args.estandarizar:
print("Estandarizando archivos...\n")
for f in comic_files:
result = standardize_comic(f, args.formato)
print(result)
print()
if __name__ == "__main__":
main()
+139
View File
@@ -0,0 +1,139 @@
# processors/cleaner.py
import os
import zipfile
import rarfile
import tempfile
import shutil
from core.constants import TRASH_FILES
from processors.validator import try_open_rar, try_open_zip
class CleanResult:
def __init__(self, original_path):
self.original_path = original_path
self.cleaned_path = None
self.removed_files = []
self.repacked = False
self.converted_to_cbz = False
def pretty_removed_files(self):
if not self.removed_files:
return " No se eliminaron archivos\n"
msg = f" Archivos eliminados ({len(self.removed_files)}):\n"
for f in self.removed_files:
msg += f" - {f}\n"
return msg
def __str__(self):
msg = f"Limpieza de: {self.original_path}\n"
msg += self.pretty_removed_files()
if self.repacked:
msg += " Archivo reconstruido\n"
if self.converted_to_cbz:
msg += " Convertido a CBZ\n"
msg += f" Resultado final: {self.cleaned_path}"
return msg
# ------------------------------------------------------------
# 1) Limpieza de una carpeta ya extraída
# ------------------------------------------------------------
def clean_folder(folder_path):
"""
Elimina archivos basura dentro de una carpeta ya extraída.
Devuelve una lista con las rutas relativas de los archivos eliminados.
"""
removed = []
for root, _, files in os.walk(folder_path):
for f in files:
if f.lower() in TRASH_FILES:
full = os.path.join(root, f)
rel = os.path.relpath(full, folder_path)
os.remove(full)
removed.append(rel)
return removed
# ------------------------------------------------------------
# 2) Limpieza de un archivo completo (modo actual)
# ------------------------------------------------------------
def clean_comic(path, output_path=None):
"""
Limpia un archivo CBR/CBZ:
- elimina basura
- convierte CBR → CBZ
- reconstruye solo si es necesario
"""
result = CleanResult(path)
ext = os.path.splitext(path)[1].lower()
# 1) Abrir archivo
if ext == ".cbr":
archive = try_open_rar(path)
if archive:
real_format = "rar"
else:
archive = try_open_zip(path)
if archive:
real_format = "zip"
else:
raise Exception(f"No se puede abrir {path}")
elif ext == ".cbz":
archive = try_open_zip(path)
if archive:
real_format = "zip"
else:
raise Exception(f"No se puede abrir {path}")
# 2) Extraer a carpeta temporal
temp_dir = tempfile.mkdtemp()
archive.extractall(temp_dir)
archive.close()
# 3) Limpiar carpeta
removed = clean_folder(temp_dir)
result.removed_files = removed
# 4) Determinar si hay que reconstruir
changes_needed = False
if removed:
changes_needed = True
if ext == ".cbr":
changes_needed = True
result.converted_to_cbz = True
if not changes_needed:
result.cleaned_path = path
shutil.rmtree(temp_dir)
return result
# 5) Ruta final
if output_path:
final_path = output_path
else:
base, _ = os.path.splitext(path)
final_path = base + ".cbz"
result.cleaned_path = final_path
# 6) Reempaquetar como CBZ
with zipfile.ZipFile(final_path, "w", zipfile.ZIP_DEFLATED) as new_zip:
for root, _, files in os.walk(temp_dir):
for f in files:
full = os.path.join(root, f)
rel = os.path.relpath(full, temp_dir)
new_zip.write(full, rel)
result.repacked = True
shutil.rmtree(temp_dir)
return result
+72
View File
@@ -0,0 +1,72 @@
# processors/converter.py
import os
import zipfile
import rarfile
import tempfile
import shutil
def decide_target_format(original_path, desired_format="cbz"):
"""
Decide si el archivo debe convertirse y cuál será su ruta final.
No realiza la conversión.
"""
base, _ = os.path.splitext(original_path)
target_path = f"{base}.{desired_format.lower()}"
needs_conversion = not original_path.lower().endswith(desired_format.lower())
return {
"needs_conversion": needs_conversion,
"target_format": desired_format.lower(),
"target_path": target_path
}
def convert_comic(path, desired_format="cbz"):
"""
Convierte un archivo CBR/CBZ al formato deseado.
NO limpia basura.
NO renombra páginas.
NO reordena nada.
"""
info = decide_target_format(path, desired_format)
if not info["needs_conversion"]:
return info # Nada que hacer
ext = os.path.splitext(path)[1].lower()
# 1) Abrir archivo original
if ext == ".cbr":
archive = rarfile.RarFile(path, "r")
elif ext == ".cbz":
archive = zipfile.ZipFile(path, "r")
else:
raise Exception("Formato no soportado")
# 2) Extraer a carpeta temporal
temp_dir = tempfile.mkdtemp()
archive.extractall(temp_dir)
archive.close()
# 3) Reempaquetar en el formato deseado
target_path = info["target_path"]
if desired_format == "cbz":
with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as new_zip:
for root, _, files in os.walk(temp_dir):
for f in files:
full = os.path.join(root, f)
rel = os.path.relpath(full, temp_dir)
new_zip.write(full, rel)
elif desired_format == "cbr":
# rarfile no puede crear RAR → hay que usar "rar" externo
raise NotImplementedError("Crear CBR requiere la herramienta 'rar' instalada")
# 4) Limpiar temporal
shutil.rmtree(temp_dir)
return info
+55
View File
@@ -0,0 +1,55 @@
# processors/standardizer.py
from processors.cleaner import clean_comic
from processors.converter import convert_comic
class StandardizeResult:
def __init__(self, original_path):
self.original_path = original_path
self.cleaned = None
self.converted = None
self.final_path = None
def __str__(self):
msg = f"Estandarización de: {self.original_path}\n"
if self.cleaned:
msg += f" Limpieza: OK ({len(self.cleaned.removed_files)} archivos eliminados)\n"
else:
msg += " Limpieza: no realizada\n"
if self.converted:
if self.converted["needs_conversion"]:
msg += f" Conversión: OK → {self.converted['target_path']}\n"
else:
msg += " Conversión: no necesaria\n"
msg += f" Resultado final: {self.final_path}\n"
return msg
def standardize_comic(path, desired_format="cbz"):
"""
Pipeline básico:
1. Limpiar
2. Convertir
"""
result = StandardizeResult(path)
# 1) Limpiar
clean_result = clean_comic(path)
result.cleaned = clean_result
# El archivo resultante tras limpiar
cleaned_path = clean_result.cleaned_path
# 2) Convertir
convert_result = convert_comic(cleaned_path, desired_format)
result.converted = convert_result
# Ruta final
if convert_result["needs_conversion"]:
result.final_path = convert_result["target_path"]
else:
result.final_path = cleaned_path
return result
+2
View File
@@ -3,6 +3,8 @@
import os
import zipfile
import rarfile
from core.constants import IMAGE_EXTENSIONS, TRASH_FILES
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
TRASH_FILES = {"thumbs.db", ".ds_store"}