Enhance dataset documentation and refactor CSV backup functionality to create a single combined file instead
All checks were successful
Deploy to Server / deploy (push) Successful in 11s
All checks were successful
Deploy to Server / deploy (push) Successful in 11s
This commit is contained in:
parent
9fd22ba8a8
commit
76b1cd70a5
3 changed files with 82 additions and 62 deletions
|
|
@ -87,7 +87,7 @@ class Command(BaseCommand):
|
|||
json_path: Path = output_dir / f"{prefix}-{timestamp}.json.zst"
|
||||
_write_json_dump(json_path, allowed_tables)
|
||||
|
||||
csv_paths: list[Path] = _write_csv_dumps(
|
||||
csv_path: Path = _write_csv_dump(
|
||||
output_dir,
|
||||
prefix,
|
||||
timestamp,
|
||||
|
|
@ -104,9 +104,7 @@ class Command(BaseCommand):
|
|||
),
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f"JSON backup created: {json_path}"))
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f"CSV backups created: {len(csv_paths)} files"),
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f"CSV backup created: {csv_path}"))
|
||||
self.stdout.write(self.style.SUCCESS(f"Included tables: {len(allowed_tables)}"))
|
||||
|
||||
|
||||
|
|
@ -353,13 +351,13 @@ def _write_json_dump(output_path: Path, tables: list[str]) -> None:
|
|||
json.dump(data, handle, default=_json_default)
|
||||
|
||||
|
||||
def _write_csv_dumps(
|
||||
def _write_csv_dump(
|
||||
output_dir: Path,
|
||||
prefix: str,
|
||||
timestamp: str,
|
||||
tables: list[str],
|
||||
) -> list[Path]:
|
||||
"""Write per-table CSV files into zstd-compressed files.
|
||||
) -> Path:
|
||||
"""Write a combined CSV file containing rows from all tables.
|
||||
|
||||
Args:
|
||||
output_dir: Directory where CSV files will be written.
|
||||
|
|
@ -368,23 +366,29 @@ def _write_csv_dumps(
|
|||
tables: Table names to include.
|
||||
|
||||
Returns:
|
||||
List of created file paths.
|
||||
Created file path.
|
||||
"""
|
||||
paths: list[Path] = []
|
||||
with django_connection.cursor() as cursor:
|
||||
for table in tables:
|
||||
cursor.execute(f'SELECT * FROM "{table}"') # noqa: S608
|
||||
columns: list[str] = [col[0] for col in cursor.description]
|
||||
rows: list[tuple] = cursor.fetchall()
|
||||
output_path: Path = output_dir / f"{prefix}-{timestamp}.csv.zst"
|
||||
|
||||
output_path: Path = output_dir / f"{prefix}-{timestamp}-{table}.csv.zst"
|
||||
with (
|
||||
output_path.open("wb") as raw_handle,
|
||||
zstd.open(raw_handle, "w") as compressed,
|
||||
io.TextIOWrapper(compressed, encoding="utf-8") as handle,
|
||||
):
|
||||
writer: csv.Writer = csv.writer(handle)
|
||||
writer.writerow(columns)
|
||||
writer.writerows(rows)
|
||||
paths.append(output_path)
|
||||
return paths
|
||||
with (
|
||||
output_path.open("wb") as raw_handle,
|
||||
zstd.open(raw_handle, "w") as compressed,
|
||||
io.TextIOWrapper(compressed, encoding="utf-8") as handle,
|
||||
):
|
||||
writer: csv.Writer = csv.writer(handle)
|
||||
writer.writerow(["table", "row_json"])
|
||||
|
||||
with django_connection.cursor() as cursor:
|
||||
for table in tables:
|
||||
cursor.execute(f'SELECT * FROM "{table}"') # noqa: S608
|
||||
columns: list[str] = [col[0] for col in cursor.description]
|
||||
rows: list[tuple] = cursor.fetchall()
|
||||
|
||||
for row in rows:
|
||||
row_dict = dict(zip(columns, row, strict=False))
|
||||
writer.writerow([
|
||||
table,
|
||||
json.dumps(row_dict, default=_json_default),
|
||||
])
|
||||
|
||||
return output_path
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue