200 lines
5.9 KiB
Python
200 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from collections import Counter
|
|
|
|
def load_saves(dir_path):
|
|
files = sorted(
|
|
f for f in os.listdir(dir_path)
|
|
if os.path.isfile(os.path.join(dir_path, f))
|
|
)
|
|
if not files:
|
|
print("No se han encontrado ficheros en el directorio.")
|
|
sys.exit(1)
|
|
|
|
buffers = []
|
|
size = None
|
|
for name in files:
|
|
path = os.path.join(dir_path, name)
|
|
with open(path, "rb") as f:
|
|
data = f.read()
|
|
if size is None:
|
|
size = len(data)
|
|
elif len(data) != size:
|
|
print(f"Tamaño inconsistente en {name}: {len(data)} bytes (esperado {size})")
|
|
sys.exit(1)
|
|
buffers.append((name, data))
|
|
|
|
print(f"Cargados {len(buffers)} saves, tamaño: {size} bytes")
|
|
return buffers, size
|
|
|
|
|
|
def classify_offset(values, sequence):
|
|
"""
|
|
values: set de valores en ese offset
|
|
sequence: lista de valores en orden de fichero (timeline)
|
|
Devuelve una etiqueta de clasificación simple.
|
|
"""
|
|
if len(values) == 1:
|
|
return "constant"
|
|
|
|
# Conteos básicos
|
|
zeros = sequence.count(0)
|
|
nonzeros = len(sequence) - zeros
|
|
|
|
# ¿Solo 0/1?
|
|
if values.issubset({0, 1}):
|
|
# Si nunca vuelve a 0 después de ser 1, parece flag permanente
|
|
seen_one = False
|
|
back_to_zero = False
|
|
for v in sequence:
|
|
if v == 1:
|
|
seen_one = True
|
|
if seen_one and v == 0:
|
|
back_to_zero = True
|
|
break
|
|
if seen_one and not back_to_zero:
|
|
return "binary_flag_one_way"
|
|
else:
|
|
return "binary_flag"
|
|
|
|
# ¿Pocos valores distintos?
|
|
if len(values) <= 8:
|
|
return "small_state"
|
|
|
|
# ¿Bitfield? (todos los valores dentro de una máscara OR)
|
|
bitmask = 0
|
|
for v in values:
|
|
bitmask |= v
|
|
# Si la máscara tiene más de un bit y todos los valores son subconjuntos de ella
|
|
if bitmask != 0 and (bitmask & (bitmask - 1)) != 0:
|
|
if all((v & ~bitmask) == 0 for v in values):
|
|
return "bitfield_like"
|
|
|
|
# ¿Mucha variabilidad?
|
|
if len(values) > len(sequence) // 4:
|
|
return "high_variability"
|
|
|
|
return "other"
|
|
|
|
|
|
def analyze_saves(buffers, size, min_changes=2):
|
|
"""
|
|
buffers: lista de (nombre, bytes)
|
|
size: tamaño de cada save
|
|
min_changes: mínimo de valores distintos para reportar
|
|
"""
|
|
num_saves = len(buffers)
|
|
# Transponer: para cada offset, lista de valores a lo largo de los saves
|
|
# Para no petar RAM, lo hacemos offset a offset
|
|
results = []
|
|
|
|
print("Analizando offsets... (esto puede tardar un poco)")
|
|
for offset in range(size):
|
|
seq = [buf[1][offset] for buf in buffers]
|
|
values = set(seq)
|
|
if len(values) < min_changes:
|
|
continue # ignoramos offsets constantes (o casi)
|
|
|
|
classification = classify_offset(values, seq)
|
|
counter = Counter(seq)
|
|
most_common = counter.most_common(5)
|
|
|
|
results.append({
|
|
"offset": offset,
|
|
"values": values,
|
|
"num_values": len(values),
|
|
"classification": classification,
|
|
"most_common": most_common,
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
def write_text_report(results, out_path):
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
for r in results:
|
|
off = r["offset"]
|
|
f.write(f"Offset 0x{off:05X} ({off}):\n")
|
|
f.write(f" Valores distintos ({r['num_values']}): "
|
|
f"{', '.join(f'0x{v:02X}' for v in sorted(r['values']))}\n")
|
|
f.write(f" Clasificación: {r['classification']}\n")
|
|
f.write(" Más frecuentes:\n")
|
|
for val, cnt in r["most_common"]:
|
|
f.write(f" 0x{val:02X} -> {cnt} veces\n")
|
|
f.write("\n")
|
|
|
|
|
|
def write_csv_report(results, out_path):
|
|
import csv
|
|
with open(out_path, "w", newline="", encoding="utf-8") as f:
|
|
writer = csv.writer(f, delimiter=';')
|
|
writer.writerow([
|
|
"offset_dec",
|
|
"offset_hex",
|
|
"num_values",
|
|
"classification",
|
|
"values_hex",
|
|
"most_common_hex_counts",
|
|
])
|
|
for r in results:
|
|
off = r["offset"]
|
|
values_hex = ",".join(f"0x{v:02X}" for v in sorted(r["values"]))
|
|
mc_str = ",".join(f"0x{v:02X}:{cnt}" for v, cnt in r["most_common"])
|
|
writer.writerow([
|
|
off,
|
|
f"0x{off:05X}",
|
|
r["num_values"],
|
|
r["classification"],
|
|
values_hex,
|
|
mc_str,
|
|
])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Analiza múltiples savegames binarios y detecta offsets interesantes."
|
|
)
|
|
parser.add_argument(
|
|
"directory",
|
|
help="Directorio que contiene los saves (todos del mismo tamaño)."
|
|
)
|
|
parser.add_argument(
|
|
"--min-changes",
|
|
type=int,
|
|
default=2,
|
|
help="Mínimo de valores distintos en un offset para incluirlo en el informe (por defecto: 2)."
|
|
)
|
|
parser.add_argument(
|
|
"--prefix",
|
|
default="analysis_report",
|
|
help="Prefijo para los ficheros de salida (txt y csv)."
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isdir(args.directory):
|
|
print(f"No es un directorio válido: {args.directory}")
|
|
sys.exit(1)
|
|
|
|
buffers, size = load_saves(args.directory)
|
|
results = analyze_saves(buffers, size, min_changes=args.min_changes)
|
|
|
|
if not results:
|
|
print("No se han encontrado offsets con cambios suficientes.")
|
|
sys.exit(0)
|
|
|
|
txt_path = os.path.join(args.directory, args.prefix + ".txt")
|
|
csv_path = os.path.join(args.directory, args.prefix + ".csv")
|
|
|
|
write_text_report(results, txt_path)
|
|
write_csv_report(results, csv_path)
|
|
|
|
print(f"Informe de texto: {txt_path}")
|
|
print(f"Informe CSV: {csv_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|