Enhance dataset documentation and refactor CSV backup functionality to create a single combined file instead

2026-03-17 01:38:49 +01:00 · 2026-03-17 01:38:49 +01:00 · 76b1cd70a5
commit 76b1cd70a5
parent 9fd22ba8a8
3 changed files with 82 additions and 62 deletions
--- a/twitch/management/commands/backup_db.py
+++ b/twitch/management/commands/backup_db.py
@ -87,7 +87,7 @@ class Command(BaseCommand):
        json_path: Path = output_dir / f"{prefix}-{timestamp}.json.zst"
        _write_json_dump(json_path, allowed_tables)

-        csv_paths: list[Path] = _write_csv_dumps(
+        csv_path: Path = _write_csv_dump(
            output_dir,
            prefix,
            timestamp,
@ -104,9 +104,7 @@ class Command(BaseCommand):
            ),
        )
        self.stdout.write(self.style.SUCCESS(f"JSON backup created: {json_path}"))
-        self.stdout.write(
-            self.style.SUCCESS(f"CSV backups created: {len(csv_paths)} files"),
-        )
+        self.stdout.write(self.style.SUCCESS(f"CSV backup created: {csv_path}"))
        self.stdout.write(self.style.SUCCESS(f"Included tables: {len(allowed_tables)}"))


@ -353,13 +351,13 @@ def _write_json_dump(output_path: Path, tables: list[str]) -> None:
        json.dump(data, handle, default=_json_default)


-def _write_csv_dumps(
+def _write_csv_dump(
    output_dir: Path,
    prefix: str,
    timestamp: str,
    tables: list[str],
-) -> list[Path]:
-    """Write per-table CSV files into zstd-compressed files.
+) -> Path:
+    """Write a combined CSV file containing rows from all tables.

    Args:
        output_dir: Directory where CSV files will be written.
@ -368,23 +366,29 @@ def _write_csv_dumps(
        tables: Table names to include.

    Returns:
-        List of created file paths.
+        Created file path.
    """
-    paths: list[Path] = []
-    with django_connection.cursor() as cursor:
-        for table in tables:
-            cursor.execute(f'SELECT * FROM "{table}"')  # noqa: S608
-            columns: list[str] = [col[0] for col in cursor.description]
-            rows: list[tuple] = cursor.fetchall()
+    output_path: Path = output_dir / f"{prefix}-{timestamp}.csv.zst"

-            output_path: Path = output_dir / f"{prefix}-{timestamp}-{table}.csv.zst"
-            with (
-                output_path.open("wb") as raw_handle,
-                zstd.open(raw_handle, "w") as compressed,
-                io.TextIOWrapper(compressed, encoding="utf-8") as handle,
-            ):
-                writer: csv.Writer = csv.writer(handle)
-                writer.writerow(columns)
-                writer.writerows(rows)
-            paths.append(output_path)
-    return paths
+    with (
+        output_path.open("wb") as raw_handle,
+        zstd.open(raw_handle, "w") as compressed,
+        io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+    ):
+        writer: csv.Writer = csv.writer(handle)
+        writer.writerow(["table", "row_json"])
+
+        with django_connection.cursor() as cursor:
+            for table in tables:
+                cursor.execute(f'SELECT * FROM "{table}"')  # noqa: S608
+                columns: list[str] = [col[0] for col in cursor.description]
+                rows: list[tuple] = cursor.fetchall()
+
+                for row in rows:
+                    row_dict = dict(zip(columns, row, strict=False))
+                    writer.writerow([
+                        table,
+                        json.dumps(row_dict, default=_json_default),
+                    ])
+
+    return output_path