diff --git a/src/apps/parsers/admin.py b/src/apps/parsers/admin.py index 0ec7c1d..5f9d6f9 100644 --- a/src/apps/parsers/admin.py +++ b/src/apps/parsers/admin.py @@ -2,13 +2,7 @@ Admin configuration for parsers app. """ -import hashlib -import time -import uuid -from pathlib import Path - -from apps.core.models import BackgroundJob -from apps.core.services import BackgroundJobService +from apps.parsers.fns_upload import FNSUploadService from apps.parsers.models import ( FinancialReport, FinancialReportLine, @@ -20,10 +14,7 @@ from apps.parsers.models import ( ProcurementRecord, Proxy, ) -from apps.parsers.serializers import FNSFileUploadSerializer -from apps.parsers.services import FNSReportService -from apps.parsers.tasks import process_fns_file -from django.conf import settings +from apps.parsers.serializers import FNSFileUploadSerializer, FNSZipUploadSerializer from django.contrib import admin, messages from django.shortcuts import redirect from django.template.response import TemplateResponse @@ -742,6 +733,11 @@ class FinancialReportAdmin(admin.ModelAdmin): self.admin_site.admin_view(self.upload_excel_view), name="parsers_financialreport_upload_excel", ), + path( + "upload-zip/", + self.admin_site.admin_view(self.upload_zip_view), + name="parsers_financialreport_upload_zip", + ), ] return custom_urls + urls @@ -750,6 +746,9 @@ class FinancialReportAdmin(admin.ModelAdmin): extra_context["upload_excel_url"] = reverse( "admin:parsers_financialreport_upload_excel" ) + extra_context["upload_zip_url"] = reverse( + "admin:parsers_financialreport_upload_zip" + ) return super().changelist_view(request, extra_context=extra_context) def upload_excel_view(self, request): @@ -768,9 +767,9 @@ class FinancialReportAdmin(admin.ModelAdmin): return redirect(changelist_url) try: - queued, skipped, task_ids = self._enqueue_fns_files( - request, - serializer.validated_data["files"], + result = FNSUploadService.queue_uploaded_files( + files=serializer.validated_data["files"], + requested_by_id=request.user.id, ) except Exception as exc: # noqa: BLE001 self.message_user( @@ -780,19 +779,21 @@ class FinancialReportAdmin(admin.ModelAdmin): ) return redirect(changelist_url) - if queued: + if result.queued: self.message_user( request, - f"Файлов поставлено в очередь: {queued}. Task IDs: {', '.join(task_ids[:5])}", + "Файлов поставлено в очередь: " + f"{result.queued}. Task IDs: {', '.join(result.task_ids[:5])}", level=messages.SUCCESS, ) - if skipped: + if result.skipped: self.message_user( request, - f"Пропущено файлов: {skipped} (дубликаты или уже обрабатываются).", + "Пропущено файлов: " + f"{result.skipped} (дубликаты или уже обрабатываются).", level=messages.WARNING, ) - if not queued and not skipped: + if not result.queued and not result.skipped: self.message_user( request, "Файлы не были обработаны.", @@ -813,86 +814,74 @@ class FinancialReportAdmin(admin.ModelAdmin): context, ) - @staticmethod - def _try_create_fns_lock(file_path: Path) -> bool: - lock_path = Path(f"{file_path}.lock") - if lock_path.exists(): - try: - age_seconds = time.time() - lock_path.stat().st_mtime - ttl_seconds = getattr(settings, "FNS_LOCK_TTL_SECONDS", 3600) - if age_seconds > ttl_seconds: - lock_path.unlink() - else: - return False - except FileNotFoundError: - pass - try: - lock_path.touch(exist_ok=False) - except FileExistsError: - return False - return True + def upload_zip_view(self, request): + changelist_url = reverse("admin:parsers_financialreport_changelist") - def _enqueue_fns_files(self, request, files): - upload_dir = Path(settings.FNS_WATCH_DIRECTORY) - upload_dir.mkdir(parents=True, exist_ok=True) + if request.method == "POST": + serializer = FNSZipUploadSerializer( + data={"file": request.FILES.get("file")} + ) - task_ids = [] - queued = 0 - skipped = 0 - - for file in files: - file_content = file.read() - file_hash = hashlib.sha256(file_content).hexdigest() - file.seek(0) - - if FNSReportService.exists_by_hash(file_hash): - skipped += 1 - continue - - file_path = upload_dir / file.name - if not self._try_create_fns_lock(file_path): - skipped += 1 - continue - - lock_path = Path(f"{file_path}.lock") - if file_path.exists(): - lock_path.unlink(missing_ok=True) - skipped += 1 - continue - - try: - with open(file_path, "wb") as f: - for chunk in file.chunks(): - f.write(chunk) - except Exception: - lock_path.unlink(missing_ok=True) - raise - - task_id = str(uuid.uuid4()) - try: - BackgroundJobService.create_job( - task_id=task_id, - task_name="apps.parsers.tasks.process_fns_file", - user_id=request.user.id, - meta={ - "source": "fns_reports", - "file": file.name, - }, + if not serializer.is_valid(): + self.message_user( + request, + f"Ошибка валидации архива: {serializer.errors}", + level=messages.ERROR, ) - task = process_fns_file.apply_async( - args=[str(file_path)], - kwargs={"requested_by_id": request.user.id}, - task_id=task_id, + return redirect(changelist_url) + + try: + result = FNSUploadService.queue_zip_archive( + archive_file=serializer.validated_data["file"], + requested_by_id=request.user.id, ) - except Exception: - lock_path.unlink(missing_ok=True) - BackgroundJob.objects.filter(task_id=task_id).delete() - raise + except Exception as exc: # noqa: BLE001 + self.message_user( + request, + f"Ошибка обработки ZIP архива: {exc}", + level=messages.ERROR, + ) + return redirect(changelist_url) - task_ids.append(task.id) - queued += 1 + if result.queued: + self.message_user( + request, + "Файлов из архива поставлено в очередь: " + f"{result.queued}. Task IDs: {', '.join(result.task_ids[:5])}", + level=messages.SUCCESS, + ) + if result.skipped: + self.message_user( + request, + f"Пропущено файлов из архива: {result.skipped}.", + level=messages.WARNING, + ) + if result.invalid: + self.message_user( + request, + f"Невалидных элементов в архиве: {result.invalid}.", + level=messages.WARNING, + ) + if not result.queued and not result.skipped and not result.invalid: + self.message_user( + request, + "Архив не содержит подходящих файлов.", + level=messages.WARNING, + ) - return queued, skipped, task_ids + return redirect(changelist_url) + + context = { + **self.admin_site.each_context(request), + "opts": self.model._meta, + "title": "Загрузка ZIP отчетности ФНС", + "changelist_url": changelist_url, + } + return TemplateResponse( + request, + "admin/parsers/financialreport/upload_zip.html", + context, + ) def has_add_permission(self, request): """Запретить создание записей вручную.""" diff --git a/src/apps/parsers/fns_upload.py b/src/apps/parsers/fns_upload.py new file mode 100644 index 0000000..57475f6 --- /dev/null +++ b/src/apps/parsers/fns_upload.py @@ -0,0 +1,178 @@ +"""Reusable upload helpers for FNS financial report files.""" + +from __future__ import annotations + +import hashlib +import re +import time +import uuid +import zipfile +from dataclasses import dataclass, field +from pathlib import Path, PurePosixPath + +from apps.core.models import BackgroundJob +from apps.core.services import BackgroundJobService +from apps.parsers.models import ParserLoadLog +from apps.parsers.services import FNSReportService +from apps.parsers.tasks import process_fns_file +from django.conf import settings + +FNS_XLSX_FILENAME_RE = re.compile(r"^fin_\d+_\d{13,15}\.xlsx$") + + +@dataclass +class FNSUploadResult: + """Result of queuing FNS files for processing.""" + + queued: int = 0 + skipped: int = 0 + invalid: int = 0 + task_ids: list[str] = field(default_factory=list) + + +class FNSUploadService: + """Queue uploaded FNS Excel files and ZIP archives for processing.""" + + @classmethod + def queue_uploaded_files(cls, *, files, requested_by_id: int | None) -> FNSUploadResult: + result = FNSUploadResult() + seen_hashes: set[str] = set() + + for uploaded_file in files: + status, task_id = cls._queue_file_bytes( + file_name=uploaded_file.name, + file_content=uploaded_file.read(), + requested_by_id=requested_by_id, + seen_hashes=seen_hashes, + ) + cls._accumulate(result=result, status=status, task_id=task_id) + + return result + + @classmethod + def queue_zip_archive( + cls, + *, + archive_file, + requested_by_id: int | None, + ) -> FNSUploadResult: + result = FNSUploadResult() + seen_hashes: set[str] = set() + + archive_file.seek(0) + try: + with zipfile.ZipFile(archive_file) as archive: + for member in archive.infolist(): + if member.is_dir(): + continue + + file_name = cls._extract_member_name(member.filename) + if not file_name or not FNS_XLSX_FILENAME_RE.match(file_name): + result.invalid += 1 + continue + + status, task_id = cls._queue_file_bytes( + file_name=file_name, + file_content=archive.read(member), + requested_by_id=requested_by_id, + seen_hashes=seen_hashes, + ) + cls._accumulate(result=result, status=status, task_id=task_id) + except zipfile.BadZipFile as exc: + raise ValueError("Загруженный файл не является корректным ZIP архивом") from exc + + return result + + @staticmethod + def _extract_member_name(member_name: str) -> str | None: + path = PurePosixPath(member_name) + if path.is_absolute() or ".." in path.parts: + return None + if len(path.parts) != 1: + return None + file_name = path.name + return file_name or None + + @classmethod + def _queue_file_bytes( + cls, + *, + file_name: str, + file_content: bytes, + requested_by_id: int | None, + seen_hashes: set[str], + ) -> tuple[str, str | None]: + file_hash = hashlib.sha256(file_content).hexdigest() + if file_hash in seen_hashes or FNSReportService.exists_by_hash(file_hash): + return "skipped", None + + upload_dir = Path(settings.FNS_WATCH_DIRECTORY) + upload_dir.mkdir(parents=True, exist_ok=True) + + file_path = upload_dir / file_name + if not cls._try_create_fns_lock(file_path): + return "skipped", None + + lock_path = Path(f"{file_path}.lock") + if file_path.exists(): + lock_path.unlink(missing_ok=True) + return "skipped", None + + try: + file_path.write_bytes(file_content) + except Exception: + lock_path.unlink(missing_ok=True) + raise + + task_id = str(uuid.uuid4()) + try: + BackgroundJobService.create_job( + task_id=task_id, + task_name="apps.parsers.tasks.process_fns_file", + user_id=requested_by_id, + meta={ + "source": ParserLoadLog.Source.FNS_REPORTS, + "file": file_name, + }, + ) + task = process_fns_file.apply_async( + args=[str(file_path)], + kwargs={"requested_by_id": requested_by_id}, + task_id=task_id, + ) + except Exception: + lock_path.unlink(missing_ok=True) + BackgroundJob.objects.filter(task_id=task_id).delete() + raise + + seen_hashes.add(file_hash) + return "queued", task.id + + @staticmethod + def _try_create_fns_lock(file_path: Path) -> bool: + lock_path = Path(f"{file_path}.lock") + if lock_path.exists(): + try: + age_seconds = time.time() - lock_path.stat().st_mtime + ttl_seconds = getattr(settings, "FNS_LOCK_TTL_SECONDS", 3600) + if age_seconds > ttl_seconds: + lock_path.unlink() + else: + return False + except FileNotFoundError: + pass + try: + lock_path.touch(exist_ok=False) + except FileExistsError: + return False + return True + + @staticmethod + def _accumulate(*, result: FNSUploadResult, status: str, task_id: str | None) -> None: + if status == "queued": + result.queued += 1 + if task_id: + result.task_ids.append(task_id) + return + if status == "skipped": + result.skipped += 1 diff --git a/src/apps/parsers/serializers.py b/src/apps/parsers/serializers.py index 7209d06..458d32f 100644 --- a/src/apps/parsers/serializers.py +++ b/src/apps/parsers/serializers.py @@ -4,6 +4,7 @@ Все сериализаторы read-only, так как данные загружаются только через парсеры. """ +from apps.parsers.fns_upload import FNS_XLSX_FILENAME_RE from apps.parsers.models import ( FinancialReport, FinancialReportLine, @@ -269,12 +270,8 @@ class FNSFileUploadSerializer(serializers.Serializer): def validate_files(self, value): """Валидация файлов.""" - import re - - pattern = re.compile(r"^fin_\d+_\d{13,15}\.xlsx$") - for file in value: - if not pattern.match(file.name): + if not FNS_XLSX_FILENAME_RE.match(file.name): raise serializers.ValidationError( f"Неверный формат имени файла: {file.name}. " "Ожидается: fin_{{id}}_{{ogrn}}.xlsx" @@ -283,6 +280,17 @@ class FNSFileUploadSerializer(serializers.Serializer): return value +class FNSZipUploadSerializer(serializers.Serializer): + """Сериализатор для загрузки ZIP архива с FNS Excel файлами.""" + + file = serializers.FileField(help_text="ZIP архив с файлами fin_*.xlsx") + + def validate_file(self, value): + if not value.name.lower().endswith(".zip"): + raise serializers.ValidationError("Поддерживаются только ZIP архивы") + return value + + # ============================================================================= # Служебные модели # ============================================================================= diff --git a/src/apps/parsers/views.py b/src/apps/parsers/views.py index a04e325..3b2fd77 100644 --- a/src/apps/parsers/views.py +++ b/src/apps/parsers/views.py @@ -6,14 +6,10 @@ Views для приложения парсеров. """ import csv -import hashlib -import time -import uuid -from pathlib import Path from apps.core.openapi import CommonResponses, ErrorResponses, swagger_tag from apps.core.response import api_response -from apps.core.services import BackgroundJobService +from apps.parsers.fns_upload import FNSUploadService from apps.parsers.models import ( FinancialReport, IndustrialCertificateRecord, @@ -42,8 +38,6 @@ from apps.parsers.serializers import ( SourceTaskStatusSerializer, ) from apps.parsers.source_cards import SourceCardService -from apps.parsers.tasks import process_fns_file -from django.conf import settings from django.db.models import CharField, Count, Q from django.db.models.functions import Cast from django.http import HttpResponse @@ -532,97 +526,16 @@ class FNSReportUploadView(APIView): serializer = FNSFileUploadSerializer(data=request.data) serializer.is_valid(raise_exception=True) - files = serializer.validated_data["files"] - task_ids = [] - queued = 0 - skipped = 0 - - # Создаём директорию для загрузки - upload_dir = Path(settings.FNS_WATCH_DIRECTORY) - upload_dir.mkdir(parents=True, exist_ok=True) - - from apps.parsers.services import FNSReportService - - def _try_create_fns_lock(file_path: Path) -> bool: - lock_path = Path(f"{file_path}.lock") - if lock_path.exists(): - try: - age_seconds = time.time() - lock_path.stat().st_mtime - ttl_seconds = getattr(settings, "FNS_LOCK_TTL_SECONDS", 3600) - if age_seconds > ttl_seconds: - lock_path.unlink() - else: - return False - except FileNotFoundError: - pass - try: - lock_path.touch(exist_ok=False) - except FileExistsError: - return False - return True - - for file in files: - # Вычисляем хеш файла - file_content = file.read() - file_hash = hashlib.sha256(file_content).hexdigest() - file.seek(0) - - # Проверяем дубликат - if FNSReportService.exists_by_hash(file_hash): - skipped += 1 - continue - - # Сохраняем файл - file_path = upload_dir / file.name - if not _try_create_fns_lock(file_path): - skipped += 1 - continue - lock_path = Path(f"{file_path}.lock") - - if file_path.exists(): - lock_path.unlink(missing_ok=True) - skipped += 1 - continue - - try: - with open(file_path, "wb") as f: - for chunk in file.chunks(): - f.write(chunk) - except Exception: - lock_path.unlink(missing_ok=True) - raise - - # Ставим в очередь - try: - task_id = str(uuid.uuid4()) - BackgroundJobService.create_job( - task_id=task_id, - task_name="apps.parsers.tasks.process_fns_file", - user_id=request.user.id, - meta={ - "source": ParserLoadLog.Source.FNS_REPORTS, - "file": file.name, - }, - ) - task = process_fns_file.apply_async( - args=[str(file_path)], - kwargs={"requested_by_id": request.user.id}, - task_id=task_id, - ) - except Exception: - lock_path.unlink(missing_ok=True) - from apps.core.models import BackgroundJob - - BackgroundJob.objects.filter(task_id=task_id).delete() - raise - task_ids.append(task.id) - queued += 1 + result = FNSUploadService.queue_uploaded_files( + files=serializer.validated_data["files"], + requested_by_id=request.user.id, + ) return Response( { - "queued": queued, - "skipped": skipped, - "task_ids": task_ids, + "queued": result.queued, + "skipped": result.skipped, + "task_ids": result.task_ids, }, status=status.HTTP_202_ACCEPTED, ) diff --git a/src/templates/admin/parsers/financialreport/change_list.html b/src/templates/admin/parsers/financialreport/change_list.html index 56dae08..6884e20 100644 --- a/src/templates/admin/parsers/financialreport/change_list.html +++ b/src/templates/admin/parsers/financialreport/change_list.html @@ -4,5 +4,8 @@