Implement parallel processing for JSON file imports
This commit is contained in:
parent
345f92bc7f
commit
b58849d11e
1 changed files with 28 additions and 23 deletions
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import traceback
|
import traceback
|
||||||
|
|
@ -160,18 +161,17 @@ class Command(BaseCommand):
|
||||||
"""Process all JSON files in a directory using parallel processing.
|
"""Process all JSON files in a directory using parallel processing.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
directory: Path to the directory.
|
directory: Path to the directory containing JSON files.
|
||||||
processed_path: Name of subdirectory to move processed files to.
|
processed_path: Path to the subdirectory where processed files will be moved.
|
||||||
continue_on_error: Continue processing if an error occurs.
|
continue_on_error: Whether to continue processing remaining files if an error occurs.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
CommandError: If the file/directory doesn't exist, isn't a JSON file,
|
CommandError: If the path is invalid or moving files fails.
|
||||||
or has an invalid JSON structure.
|
ValueError: If a JSON file has an invalid structure.
|
||||||
ValueError: If the JSON file has an invalid structure.
|
TypeError: If a JSON file has an invalid structure.
|
||||||
TypeError: If the JSON file has an invalid structure.
|
AttributeError: If a JSON file has an invalid structure.
|
||||||
AttributeError: If the JSON file has an invalid structure.
|
KeyError: If a JSON file has an invalid structure.
|
||||||
KeyError: If the JSON file has an invalid structure.
|
IndexError: If a JSON file has an invalid structure.
|
||||||
IndexError: If the JSON file has an invalid structure.
|
|
||||||
"""
|
"""
|
||||||
json_files: list[Path] = list(directory.glob("*.json"))
|
json_files: list[Path] = list(directory.glob("*.json"))
|
||||||
if not json_files:
|
if not json_files:
|
||||||
|
|
@ -181,19 +181,24 @@ class Command(BaseCommand):
|
||||||
total_files: int = len(json_files)
|
total_files: int = len(json_files)
|
||||||
self.stdout.write(f"Found {total_files} JSON files to process")
|
self.stdout.write(f"Found {total_files} JSON files to process")
|
||||||
|
|
||||||
for json_file in json_files:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
self.stdout.write(f"Processing file {json_file.name}...")
|
future_to_file: dict[concurrent.futures.Future[None], Path] = {
|
||||||
try:
|
executor.submit(self._process_file, json_file, processed_path): json_file for json_file in json_files
|
||||||
self._process_file(json_file, processed_path)
|
}
|
||||||
except CommandError as e:
|
for future in concurrent.futures.as_completed(future_to_file):
|
||||||
if not continue_on_error:
|
json_file: Path = future_to_file[future]
|
||||||
raise
|
self.stdout.write(f"Processing file {json_file.name}...")
|
||||||
self.stdout.write(self.style.ERROR(f"Error processing {json_file}: {e}"))
|
try:
|
||||||
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
|
future.result()
|
||||||
if not continue_on_error:
|
except CommandError as e:
|
||||||
raise
|
if not continue_on_error:
|
||||||
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}"))
|
raise
|
||||||
self.stdout.write(self.style.ERROR(traceback.format_exc()))
|
self.stdout.write(self.style.ERROR(f"Error processing {json_file}: {e}"))
|
||||||
|
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
|
||||||
|
if not continue_on_error:
|
||||||
|
raise
|
||||||
|
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}"))
|
||||||
|
self.stdout.write(self.style.ERROR(traceback.format_exc()))
|
||||||
|
|
||||||
msg: str = f"Processed {total_files} JSON files in {directory}. Moved processed files to {processed_path}."
|
msg: str = f"Processed {total_files} JSON files in {directory}. Moved processed files to {processed_path}."
|
||||||
self.stdout.write(self.style.SUCCESS(msg))
|
self.stdout.write(self.style.SUCCESS(msg))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue