diff --git a/src/apps/backups/admin.py b/src/apps/backups/admin.py index 777b119..0b9b28e 100644 --- a/src/apps/backups/admin.py +++ b/src/apps/backups/admin.py @@ -1,17 +1,17 @@ """Admin для приложения backups.""" +from urllib.parse import urlencode + from apps.backups.models import BackupExportJob from apps.backups.serializers import BackupExportRequestSerializer from apps.backups.services import BackupExportError, BackupExportJobService from apps.core.services import BackgroundJobService -from django.contrib import admin -from django.contrib import messages +from django.contrib import admin, messages from django.http import HttpResponse from django.shortcuts import redirect from django.urls import path, reverse -from django.utils.html import format_html from django.utils import timezone -from urllib.parse import urlencode +from django.utils.html import format_html @admin.register(BackupExportJob) @@ -190,23 +190,29 @@ class BackupExportJobAdmin(admin.ModelAdmin): ) return redirect(changelist_url) - actual_date = serializer.validated_data.get("actual_date") or timezone.localdate() + actual_date = ( + serializer.validated_data.get("actual_date") or timezone.localdate() + ) try: result = BackupExportJobService.check_or_start_job( actual_date=actual_date, - requested_by_id=request.user.id if request.user.is_authenticated else None, + requested_by_id=request.user.id + if request.user.is_authenticated + else None, ) except BackupExportError as exc: - self.message_user(request, f"Ошибка запуска резервного экспорта: {exc}", level=messages.ERROR) + self.message_user( + request, + f"Ошибка запуска резервного экспорта: {exc}", + level=messages.ERROR, + ) return redirect(changelist_url) if result.action in {"started", "wait"}: self.message_user( request, result.message, - level=messages.INFO - if result.action == "started" - else messages.WARNING, + level=messages.INFO if result.action == "started" else messages.WARNING, ) return redirect(changelist_url) @@ -216,10 +222,14 @@ class BackupExportJobAdmin(admin.ModelAdmin): actual_date=result.actual_date ) except BackupExportError as exc: - self.message_user(request, f"Ошибка загрузки backup: {exc}", level=messages.ERROR) + self.message_user( + request, f"Ошибка загрузки backup: {exc}", level=messages.ERROR + ) return redirect(changelist_url) - response = HttpResponse(artifact.archive_bytes, content_type="application/zip") + response = HttpResponse( + artifact.archive_bytes, content_type="application/zip" + ) response.status_code = 200 response[ "Content-Disposition" diff --git a/src/apps/backups/services.py b/src/apps/backups/services.py index 34c6a51..ae2cdef 100644 --- a/src/apps/backups/services.py +++ b/src/apps/backups/services.py @@ -28,17 +28,17 @@ from apps.parsers.models import ( ManufacturerRecord, ProcurementRecord, ) +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +from django.conf import settings +from django.db import IntegrityError, transaction +from django.db.models import Model, Q +from django.utils import timezone from registers.models import ( Organization, Register, RegisterUpload, RegistryMembershipPeriod, ) -from cryptography.hazmat.primitives.ciphers.aead import AESGCM -from django.conf import settings -from django.db import IntegrityError, transaction -from django.db.models import Model -from django.utils import timezone class BackupExportError(ValueError): @@ -127,7 +127,10 @@ class BackupExportService: @classmethod def _get_active_organization_ids(cls, actual_date: date) -> list[int]: return list( - RegistryMembershipPeriod.objects.all() + RegistryMembershipPeriod.objects.filter( + started_at__lte=actual_date, + ) + .filter(Q(ended_at__isnull=True) | Q(ended_at__gt=actual_date)) .values_list("organization_id", flat=True) .distinct() ) @@ -141,7 +144,8 @@ class BackupExportService: ) -> dict: active_memberships = RegistryMembershipPeriod.objects.filter( organization_id__in=active_org_ids, - ) + started_at__lte=actual_date, + ).filter(Q(ended_at__isnull=True) | Q(ended_at__gt=actual_date)) register_ids = list( active_memberships.values_list("registry_id", flat=True).distinct() diff --git a/src/apps/backups/tasks.py b/src/apps/backups/tasks.py index 1a85273..c8e057e 100644 --- a/src/apps/backups/tasks.py +++ b/src/apps/backups/tasks.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging -import traceback import uuid from pathlib import Path @@ -110,10 +109,9 @@ def generate_backup_for_date(self, job_id: int) -> dict: job.actual_date, job.id, ) - error_traceback = traceback.format_exc() job.status = BackupExportJob.Status.FAILURE job.error = str(exc) job.completed_at = timezone.now() job.save(update_fields=["status", "error", "completed_at", "updated_at"]) - background_job.fail(error=str(exc), traceback_str=error_traceback) + background_job.fail(error=str(exc)) raise diff --git a/src/apps/core/admin_dashboard.py b/src/apps/core/admin_dashboard.py index 03cc9d6..0f4b3ff 100644 --- a/src/apps/core/admin_dashboard.py +++ b/src/apps/core/admin_dashboard.py @@ -16,14 +16,14 @@ from apps.parsers.models import ( Proxy, ) from apps.parsers.source_cards import SourceCardService +from django.db.models import Count, Max +from django.urls import NoReverseMatch, reverse from registers.models import ( Organization, Register, RegisterUpload, RegistryMembershipPeriod, ) -from django.db.models import Count, Max -from django.urls import NoReverseMatch, reverse SOURCE_COLORS = ( "#49d0c8", @@ -138,10 +138,7 @@ def build_admin_dashboard() -> dict[str, Any]: source_cards = _build_source_cards() source_mix = _build_source_mix(source_cards) active_registry_orgs = ( - RegistryMembershipPeriod.objects - .values("organization_id") - .distinct() - .count() + RegistryMembershipPeriod.objects.values("organization_id").distinct().count() ) total_organizations = Organization.objects.count() healthy_sources = sum(1 for card in source_cards if card["status"] == "success") diff --git a/src/apps/core/services.py b/src/apps/core/services.py index 1eb45b0..533c8c4 100644 --- a/src/apps/core/services.py +++ b/src/apps/core/services.py @@ -8,6 +8,7 @@ They are easily testable and can manage transactions. import logging from typing import Any, Generic, TypeVar +import django from apps.core.exceptions import NotFoundError from django.db import models, transaction from django.db.models import QuerySet @@ -272,8 +273,13 @@ class BulkOperationsMixin: "ignore_conflicts": ignore_conflicts, } - # Django 4.1+ поддерживает update_conflicts - if update_conflicts and update_fields and unique_fields: + # Django 4.1+ поддерживает update_conflicts; проект закреплён на Django 3.x. + if ( + django.VERSION >= (4, 1) + and update_conflicts + and update_fields + and unique_fields + ): kwargs["update_conflicts"] = True kwargs["update_fields"] = update_fields kwargs["unique_fields"] = unique_fields diff --git a/src/apps/core/views.py b/src/apps/core/views.py index c568ee3..1d0cda9 100644 --- a/src/apps/core/views.py +++ b/src/apps/core/views.py @@ -21,6 +21,7 @@ from apps.core.serializers import ( from django.conf import settings from django.db import connection from django.http import StreamingHttpResponse +from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status from rest_framework.exceptions import ValidationError @@ -354,6 +355,59 @@ class BackgroundJobStreamView(BackgroundJobStatusView): return response +class BackgroundJobControlView(BackgroundJobStatusView): + """Управление фоновой задачей: сейчас поддерживается revoke.""" + + @swagger_auto_schema( + tags=[JOBS_TAG], + operation_summary="Управление задачей", + operation_description="Отзывает Celery задачу по action=revoke.", + request_body=openapi.Schema( + type=openapi.TYPE_OBJECT, + properties={ + "action": openapi.Schema( + type=openapi.TYPE_STRING, + enum=["revoke"], + ), + "terminate": openapi.Schema(type=openapi.TYPE_BOOLEAN), + }, + ), + responses={ + 200: BackgroundJobSerializer, + 400: CommonResponses.BAD_REQUEST, + 403: CommonResponses.FORBIDDEN, + 404: CommonResponses.NOT_FOUND, + **ErrorResponses.AUTHENTICATED, + }, + ) + def post(self, request: Request, task_id: str) -> Response: + from apps.core.models import JobStatus + from apps.core.services import BackgroundJobService + from celery import current_app + + job = BackgroundJobService.get_by_task_id(task_id) + access_error = self._check_access(request, job) + if access_error is not None: + return access_error + + action = request.data.get("action", "revoke") + if action != "revoke": + return Response( + {"detail": "Поддерживается только action=revoke"}, + status=status.HTTP_400_BAD_REQUEST, + ) + + current_app.control.revoke( + task_id, + terminate=bool(request.data.get("terminate", False)), + ) + if not job.is_finished: + job.status = JobStatus.REVOKED + job.progress_message = "Задача отозвана пользователем" + job.save(update_fields=["status", "progress_message", "updated_at"]) + return Response(BackgroundJobSerializer(job).data) + + class BackgroundJobListView(APIView): """ Список фоновых задач пользователя. diff --git a/src/apps/exchange/state_corp_services.py b/src/apps/exchange/state_corp_services.py index 47af715..9df89c1 100644 --- a/src/apps/exchange/state_corp_services.py +++ b/src/apps/exchange/state_corp_services.py @@ -23,10 +23,10 @@ from apps.parsers.models import ( InspectionRecord, ProcurementRecord, ) -from registers.models import Organization from cryptography.hazmat.primitives.ciphers.aead import AESGCM from django.conf import settings from django.utils import timezone +from registers.models import Organization class StateCorpExchangeError(ValueError): diff --git a/src/apps/exchange/urls.py b/src/apps/exchange/urls.py index 9bd6958..e7aa414 100644 --- a/src/apps/exchange/urls.py +++ b/src/apps/exchange/urls.py @@ -6,6 +6,7 @@ from apps.exchange.views import ( ExchangeCopyDataView, ExchangePeriodicTaskDetailView, ExchangePeriodicTaskListCreateView, + ExchangeTableListView, ) from django.urls import path @@ -20,6 +21,7 @@ exchange_urlpatterns = [ ExchangeConnectionTestView.as_view(), name="connections-test", ), + path("tables/", ExchangeTableListView.as_view(), name="tables"), path("copy/", ExchangeCopyDataView.as_view(), name="copy"), path( "periodic-tasks/", diff --git a/src/apps/exchange/views.py b/src/apps/exchange/views.py index f4d284b..53c0ece 100644 --- a/src/apps/exchange/views.py +++ b/src/apps/exchange/views.py @@ -213,6 +213,25 @@ class ExchangeCopyDataView(APIView): ) +class ExchangeTableListView(APIView): + """Список таблиц, доступных для выгрузки во внешнюю БД.""" + + permission_classes = [IsAdminUser] + + @swagger_auto_schema( + tags=[EXCHANGE_TAG], + operation_summary="Список таблиц exchange", + operation_description="Возвращает таблицы parser-моделей для режима selected.", + responses={200: "Exchange table choices", **ErrorResponses.ADMIN}, + ) + def get(self, request): + rows = [ + {"table": table, "value": table, "title": title, "label": title} + for table, title in ExchangeConnectionService.get_copy_table_choices() + ] + return api_response(rows) + + class ExchangePeriodicTaskListCreateView(APIView): """API списка и создания периодических задач обмена.""" @@ -311,6 +330,12 @@ class ExchangePeriodicTaskDetailView(APIView): ExchangePeriodicTaskService.get_queryset(), id=task_id, ) + if set(request.data) <= {"enabled"}: + task.enabled = bool(request.data.get("enabled")) + task.save(update_fields=["enabled", "date_changed"]) + output = ExchangePeriodicTaskSerializer(task) + return Response(output.data, status=status.HTTP_200_OK) + serializer = ExchangePeriodicTaskUpsertSerializer( task, data=request.data, @@ -329,3 +354,11 @@ class ExchangePeriodicTaskDetailView(APIView): output = ExchangePeriodicTaskSerializer(task) return Response(output.data, status=status.HTTP_200_OK) + + def delete(self, request, task_id: int): + task = get_object_or_404( + ExchangePeriodicTaskService.get_queryset(), + id=task_id, + ) + task.delete() + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/src/apps/parsers/api_result_urls.py b/src/apps/parsers/api_result_urls.py new file mode 100644 index 0000000..e5d55f8 --- /dev/null +++ b/src/apps/parsers/api_result_urls.py @@ -0,0 +1,161 @@ +"""Frontend-oriented parser result API v1 routes.""" + +from apps.parsers.serializers import ( + ParserResultRecordSerializer, + ParserRunResponseSerializer, +) +from apps.parsers.source_registry import PARSER_SOURCES, ParserSourceDescriptor +from apps.parsers.views import ( + RESULT_DETAIL_PARAMS, + RESULT_LIST_PARAMS, + UPLOAD_FILE_PARAM, + ParserUploadView, + SourceResultDetailView, + SourceResultListView, + source_result_swagger_tag, +) +from django.urls import path +from drf_yasg.utils import swagger_auto_schema + +app_name = "parser_results" +ROUTE_TITLES = { + "zakupki": "ЕИС Закупки", + "proverki": "Проверки Генпрокуратуры", +} + + +def _result_source_descriptors(): + """Вернуть уникальные источники с явным API route.""" + seen_routes = set() + for descriptor in PARSER_SOURCES.values(): + if not descriptor.api_route or descriptor.api_route in seen_routes: + continue + seen_routes.add(descriptor.api_route) + yield descriptor + + +def _upload_source_descriptors(): + """Вернуть уникальные источники с route ручной загрузки.""" + seen_routes = set() + for descriptor in PARSER_SOURCES.values(): + if not descriptor.supports_file_upload: + continue + upload_route = descriptor.upload_api_route + if not upload_route or upload_route in seen_routes: + continue + seen_routes.add(upload_route) + yield descriptor + + +def _route_title(descriptor: ParserSourceDescriptor) -> str: + """Вернуть человекочитаемое название публичного route.""" + return ROUTE_TITLES.get(descriptor.api_route, descriptor.title) + + +def _route_description(descriptor: ParserSourceDescriptor) -> str: + """Вернуть описание данных, опубликованных на одном route.""" + scopes = [ + item.data_scope + for item in PARSER_SOURCES.values() + if item.api_route == descriptor.api_route + ] + return "; ".join(dict.fromkeys(scopes)) + + +def _list_view(descriptor: ParserSourceDescriptor): + tag = source_result_swagger_tag(descriptor.key) + title = _route_title(descriptor) + description = _route_description(descriptor) + + class SourceListView(SourceResultListView): + source_key = descriptor.key + + @swagger_auto_schema( + operation_summary=f"{title}: get_list", + operation_description=( + f"Список записей источника: {description}. " + "Фильтры передаются query-параметрами." + ), + manual_parameters=RESULT_LIST_PARAMS, + tags=[tag], + responses={200: ParserResultRecordSerializer(many=True)}, + ) + def get(self, request): + return super().get(request) + + return SourceListView.as_view() + + +def _detail_view(descriptor: ParserSourceDescriptor): + tag = source_result_swagger_tag(descriptor.key) + title = _route_title(descriptor) + description = _route_description(descriptor) + + class SourceDetailView(SourceResultDetailView): + source_key = descriptor.key + + @swagger_auto_schema( + operation_summary=f"{title}: get", + operation_description=( + f"Одна запись источника: {description}. " + "Query-параметры дополнительно сужают выборку." + ), + manual_parameters=RESULT_DETAIL_PARAMS, + tags=[tag], + responses={200: ParserResultRecordSerializer, 404: "Запись не найдена"}, + ) + def get(self, request, pk: int): + return super().get(request, pk=pk) + + return SourceDetailView.as_view() + + +def _upload_view(descriptor: ParserSourceDescriptor): + tag = source_result_swagger_tag(descriptor.key) + + class SourceUploadView(ParserUploadView): + @swagger_auto_schema( + operation_summary=f"{descriptor.title}: загрузить файл", + operation_description=( + f"Ручная загрузка файла для источника: {descriptor.data_scope}. " + "Файл обрабатывается через Celery." + ), + manual_parameters=[UPLOAD_FILE_PARAM], + consumes=["multipart/form-data"], + tags=[tag], + responses={202: ParserRunResponseSerializer, 400: "Ошибка валидации"}, + ) + def post(self, request): + return super().post(request, source_key=descriptor.key) + + return SourceUploadView.as_view() + + +urlpatterns = [] +for source_descriptor in _result_source_descriptors(): + route_name = source_descriptor.api_route.replace("/", "-") + urlpatterns.extend( + [ + path( + f"{source_descriptor.api_route}/", + _list_view(source_descriptor), + name=f"{route_name}-get-list", + ), + path( + f"{source_descriptor.api_route}//", + _detail_view(source_descriptor), + name=f"{route_name}-get", + ), + ] + ) + +for source_descriptor in _upload_source_descriptors(): + upload_route = source_descriptor.upload_api_route + route_name = upload_route.replace("/", "-") + urlpatterns.append( + path( + f"{upload_route}/", + _upload_view(source_descriptor), + name=f"{route_name}-upload", + ) + ) diff --git a/src/apps/parsers/clients/base.py b/src/apps/parsers/clients/base.py index a9e2cc3..5f47f55 100644 --- a/src/apps/parsers/clients/base.py +++ b/src/apps/parsers/clients/base.py @@ -6,6 +6,7 @@ import logging import random +from contextlib import suppress from dataclasses import dataclass, field from typing import Any @@ -70,6 +71,7 @@ class BaseHTTPClient: timeout: int = 30 headers: dict[str, str] = field(default_factory=dict) adapter: BaseAdapter | None = None + verify_ssl: bool = True def __post_init__(self) -> None: """Инициализация после создания dataclass.""" @@ -180,7 +182,12 @@ class BaseHTTPClient: logger.info("GET %s (proxy: %s)", url, self._current_proxy) try: - response = self.session.get(url, params=params, timeout=self.timeout) + response = self.session.get( + url, + params=params, + timeout=self.timeout, + verify=self.verify_ssl, + ) except requests.exceptions.ConnectionError as e: logger.error("Connection error: %s - %s", url, e) raise ConnectionError(f"Failed to connect to {url}", url=url) from e @@ -202,6 +209,46 @@ class BaseHTTPClient: logger.debug("Response %d from %s", response.status_code, url) return response + def post_json( + self, + endpoint: str, + *, + payload: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + ) -> dict: + """Выполнить POST с JSON body и вернуть JSON.""" + url = self._build_url(endpoint) + logger.info("POST %s (proxy: %s)", url, self._current_proxy) + + try: + response = self.session.post( + url, + params=params, + json=payload, + timeout=self.timeout, + verify=self.verify_ssl, + ) + except requests.exceptions.ConnectionError as e: + logger.error("Connection error: %s - %s", url, e) + raise ConnectionError(f"Failed to connect to {url}", url=url) from e + except requests.exceptions.Timeout as e: + logger.error("Timeout: %s", url) + raise ConnectionError(f"Request timeout for {url}", url=url) from e + except requests.exceptions.RequestException as e: + logger.error("Request error: %s - %s", url, e) + raise HTTPClientError(f"Request failed: {e}", url=url) from e + + if not response.ok: + logger.error("HTTP error %d: %s", response.status_code, url) + raise HTTPError( + f"HTTP {response.status_code} for {url}", + status_code=response.status_code, + url=url, + ) + + logger.debug("POST Response %d from %s", response.status_code, url) + return response.json() + def post( self, endpoint: str, @@ -239,6 +286,7 @@ class BaseHTTPClient: json=json, headers=request_headers, timeout=self.timeout, + verify=self.verify_ssl, ) except requests.exceptions.ConnectionError as e: logger.error("Connection error: %s - %s", url, e) @@ -276,7 +324,12 @@ class BaseHTTPClient: return response.json() def download_file( - self, endpoint: str, headers: dict[str, str] | None = None + self, + endpoint: str, + headers: dict[str, str] | None = None, + *, + max_size_bytes: int | None = None, + chunk_size: int = 1024 * 1024, ) -> bytes: """ Скачать файл. @@ -284,6 +337,8 @@ class BaseHTTPClient: Args: endpoint: Путь или полный URL файла headers: Дополнительные заголовки + max_size_bytes: Максимальный допустимый размер файла + chunk_size: Размер чанка потокового чтения Returns: Содержимое файла как bytes @@ -298,7 +353,11 @@ class BaseHTTPClient: try: response = self.session.get( - url, headers=request_headers, timeout=self.timeout + url, + headers=request_headers, + timeout=self.timeout, + stream=True, + verify=self.verify_ssl, ) except requests.exceptions.ConnectionError as e: logger.error("Connection error: %s - %s", url, e) @@ -310,17 +369,82 @@ class BaseHTTPClient: logger.error("Request error: %s - %s", url, e) raise HTTPClientError(f"Request failed: {e}", url=url) from e - if not response.ok: - logger.error("HTTP error %d: %s", response.status_code, url) - raise HTTPError( - f"HTTP {response.status_code} for {url}", - status_code=response.status_code, + try: + self._raise_for_response(response, url) + self._validate_content_length(response, max_size_bytes, url) + content = self._read_stream(response, max_size_bytes, chunk_size, url) + finally: + self._close_response(response) + + logger.info("Downloaded %d bytes from %s", len(content), url) + return content + + def _raise_for_response(self, response: requests.Response, url: str) -> None: + """Проверить HTTP статус ответа.""" + if response.ok: + return + logger.error("HTTP error %d: %s", response.status_code, url) + raise HTTPError( + f"HTTP {response.status_code} for {url}", + status_code=response.status_code, + url=url, + ) + + def _validate_content_length( + self, + response: requests.Response, + max_size_bytes: int | None, + url: str, + ) -> None: + """Проверить Content-Length до чтения body.""" + content_length = response.headers.get("Content-Length") + if not content_length or max_size_bytes is None: + return + try: + declared_size = int(content_length) + except ValueError: + return + if declared_size > max_size_bytes: + raise HTTPClientError( + f"File is too large: {declared_size} bytes", url=url, ) - content = response.content - logger.info("Downloaded %d bytes from %s", len(content), url) - return content + def _read_stream( + self, + response: requests.Response, + max_size_bytes: int | None, + chunk_size: int, + url: str, + ) -> bytes: + """Прочитать response stream с лимитом размера.""" + if getattr(response, "raw", None) is None: + content = response.content + if max_size_bytes is not None and len(content) > max_size_bytes: + raise HTTPClientError( + f"File is too large: {len(content)} bytes", + url=url, + ) + return content + + chunks = [] + total_size = 0 + for chunk in response.iter_content(chunk_size=chunk_size): + if not chunk: + continue + total_size += len(chunk) + if max_size_bytes is not None and total_size > max_size_bytes: + raise HTTPClientError( + f"File is too large: {total_size} bytes", + url=url, + ) + chunks.append(chunk) + return b"".join(chunks) + + def _close_response(self, response: requests.Response) -> None: + """Закрыть response, учитывая lightweight test adapters без raw stream.""" + with suppress(AttributeError): + response.close() def close(self) -> None: """Закрыть сессию.""" diff --git a/src/apps/parsers/clients/common/__init__.py b/src/apps/parsers/clients/common/__init__.py new file mode 100644 index 0000000..f64c3a6 --- /dev/null +++ b/src/apps/parsers/clients/common/__init__.py @@ -0,0 +1,13 @@ +"""Общие клиенты и DTO для новых разнородных источников.""" + +from apps.parsers.clients.common.schemas import GenericParserItem +from apps.parsers.clients.common.structured import ( + StructuredDataClient, + StructuredDataClientError, +) + +__all__ = [ + "GenericParserItem", + "StructuredDataClient", + "StructuredDataClientError", +] diff --git a/src/apps/parsers/clients/common/schemas.py b/src/apps/parsers/clients/common/schemas.py new file mode 100644 index 0000000..933ea73 --- /dev/null +++ b/src/apps/parsers/clients/common/schemas.py @@ -0,0 +1,22 @@ +"""DTO для универсальных записей новых источников.""" + +from dataclasses import dataclass, field +from decimal import Decimal +from typing import Any + + +@dataclass(frozen=True) +class GenericParserItem: + """Нормализованная запись из внешнего источника.""" + + source: str + external_id: str + inn: str = "" + ogrn: str = "" + organisation_name: str = "" + title: str = "" + record_date: str = "" + amount: Decimal | None = None + status: str = "" + url: str = "" + payload: dict[str, Any] = field(default_factory=dict) diff --git a/src/apps/parsers/clients/common/structured.py b/src/apps/parsers/clients/common/structured.py new file mode 100644 index 0000000..5f597b4 --- /dev/null +++ b/src/apps/parsers/clients/common/structured.py @@ -0,0 +1,1018 @@ +"""Клиент для загрузки структурированных файлов новых источников.""" + +import csv +import hashlib +import io +import json +import logging +import re +import zipfile +from collections import Counter +from dataclasses import dataclass, field +from decimal import Decimal, InvalidOperation +from typing import Any +from urllib.parse import urljoin + +from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError +from apps.parsers.clients.common.schemas import GenericParserItem +from bs4 import BeautifulSoup +from defusedxml import ElementTree as ET +from openpyxl import load_workbook + +logger = logging.getLogger(__name__) + +LIST_KEYS = ( + "data", + "content", + "results", + "items", + "records", + "rows", + "documents", + "list", +) +SUPPORTED_EXCEL_EXTENSIONS = (".xlsx", ".xlsm") +SUPPORTED_ZIP_EXTENSIONS = ( + ".json", + ".csv", + ".xml", + ".html", + ".htm", + ".xlsx", + ".xlsm", +) +MAX_FILE_SIZE_BYTES = 200 * 1024 * 1024 +MAX_ZIP_ENTRIES = 100 +MAX_ZIP_UNCOMPRESSED_BYTES = 500 * 1024 * 1024 +MAX_ZIP_COMPRESSION_RATIO = 200 +MAX_RECORDS = 200_000 +MPT_PRODUCTS_SOURCE = "mpt_products" +FSTEC_SOURCE = "fstec" +FAS_GOZ_SOURCE = "fas_goz" +EIS_CARD_SOURCES = { + "procurements_44fz", + "procurements_223fz", + "contracts", + "unfair_suppliers", +} +ZAKUPKI_BASE_URL = "https://zakupki.gov.ru" +GISP_PRODUCTS_DOWNLOAD_LABEL = "Скачать только действующие" +GISP_PRODUCTS_API_PATH = "/pp719v2/pub/prod/b/" +GISP_PRODUCTS_PAGE_SIZE = 100 + + +class StructuredDataClientError(HTTPClientError): + """Ошибка клиента структурированных данных.""" + + pass + + +@dataclass +class StructuredDataClient: + """ + Универсальный клиент для файловых выгрузок. + + Поддерживает JSON, CSV, XLSX, XML, HTML-таблицы и ZIP-архивы с этими файлами. + Используется как первый слайс для источников, где формат выгрузки уточняется + отдельно и может отличаться между ведомствами. + """ + + source: str + proxies: list[str] | None = None + timeout: int = 120 + max_file_size_bytes: int = MAX_FILE_SIZE_BYTES + max_zip_entries: int = MAX_ZIP_ENTRIES + max_zip_uncompressed_bytes: int = MAX_ZIP_UNCOMPRESSED_BYTES + max_records: int = MAX_RECORDS + verify_ssl: bool = True + _http_client: BaseHTTPClient | None = field(default=None, repr=False) + + @property + def http_client(self) -> BaseHTTPClient: + """Ленивая инициализация HTTP клиента.""" + if self._http_client is None: + self._http_client = BaseHTTPClient( + base_url="", + proxies=self.proxies, + timeout=self.timeout, + verify_ssl=self.verify_ssl, + ) + return self._http_client + + def fetch_records( + self, + *, + file_url: str | None = None, + content: bytes | None = None, + file_name: str = "", + ) -> list[GenericParserItem]: + """ + Загрузить и нормализовать записи из структурированного файла. + + Args: + file_url: URL выгрузки. + content: Содержимое файла, используется в тестах или при внешней загрузке. + file_name: Имя файла для определения формата. + """ + if content is None: + if not file_url: + raise StructuredDataClientError("file_url is required") + if self._is_gisp_products_page(file_url): + rows = self._fetch_gisp_products_page(file_url) + records = [ + self._normalize_row(row, index) for index, row in enumerate(rows) + ] + logger.info( + "Parsed %d records for source=%s", + len(records), + self.source, + ) + return records + + content = self.http_client.download_file( + file_url, + max_size_bytes=self.max_file_size_bytes, + ) + file_name = file_name or file_url.rsplit("/", 1)[-1] + download_url = self._extract_preferred_html_download_url( + content, + base_url=file_url, + ) + if download_url: + content = self.http_client.download_file( + download_url, + max_size_bytes=self.max_file_size_bytes, + ) + file_name = download_url.rsplit("/", 1)[-1] or file_name + + self._validate_file_size(len(content), file_name or "content") + rows = self._parse_content(content, file_name=file_name) + records = [self._normalize_row(row, index) for index, row in enumerate(rows)] + logger.info("Parsed %d records for source=%s", len(records), self.source) + return records + + def _is_gisp_products_page(self, file_url: str) -> bool: + """Проверить, что это web API страницы продукции ГИСП.""" + return self.source == MPT_PRODUCTS_SOURCE and "/pp719v2/pub/prod" in file_url + + def _fetch_gisp_products_page(self, file_url: str) -> list[dict]: + """Загрузить первую страницу реестра продукции ГИСП через официальный UI API.""" + api_url = urljoin(file_url, GISP_PRODUCTS_API_PATH) + data = self.http_client.post_json( + api_url, + payload={ + "opt": { + "skip": 0, + "take": GISP_PRODUCTS_PAGE_SIZE, + "requireTotalCount": True, + "sort": [{"selector": "res_date", "desc": True}], + } + }, + ) + items = data.get("items") + if not isinstance(items, list): + raise StructuredDataClientError("GISP products API returned no items list") + self._validate_record_count(len(items)) + return [self._as_dict(item) for item in items] + + def _parse_content( # noqa: C901 + self, content: bytes, *, file_name: str = "" + ) -> list[dict]: + """Определить формат и распарсить файл.""" + name = file_name.lower() + + if name.endswith(SUPPORTED_EXCEL_EXTENSIONS): + return self._parse_xlsx(content) + if name.endswith(".zip"): + return self._parse_zip(content) + if name.endswith(".json"): + return self._parse_json(content) + if name.endswith(".csv"): + return self._parse_csv(content) + if name.endswith(".xml"): + return self._parse_xml(content) + if name.endswith((".html", ".htm")): + return self._parse_html(content) + + if content.startswith(b"PK\x03\x04"): + try: + return self._parse_xlsx(content) + except Exception: + return self._parse_zip(content) + + text = self._decode(content).lstrip() + if text.startswith(("{", "[")): + return self._parse_json(content) + if text.startswith("<"): + if self._looks_like_html(text): + return self._parse_html(content) + return self._parse_xml(content) + return self._parse_csv(content) + + def _looks_like_html(self, text: str) -> bool: + """Определить HTML-страницу до попытки XML-разбора.""" + prefix = text[:8192].lower() + return ( + prefix.startswith(" list[dict]: + """Распарсить поддерживаемые файлы внутри ZIP.""" + rows: list[dict] = [] + with zipfile.ZipFile(io.BytesIO(content)) as archive: + supported_files = [ + info + for info in archive.infolist() + if not info.is_dir() + and info.filename.lower().endswith(SUPPORTED_ZIP_EXTENSIONS) + ] + if len(supported_files) > self.max_zip_entries: + raise StructuredDataClientError( + f"ZIP contains too many supported files: {len(supported_files)}" + ) + + total_uncompressed = sum(info.file_size for info in supported_files) + if total_uncompressed > self.max_zip_uncompressed_bytes: + raise StructuredDataClientError( + "ZIP uncompressed size exceeds limit: " + f"{total_uncompressed} bytes" + ) + + for info in supported_files: + if info.is_dir(): + continue + name = info.filename.lower() + self._validate_zip_member(info) + rows.extend( + self._parse_content(archive.read(info.filename), file_name=name) + ) + self._validate_record_count(len(rows)) + return rows + + def _parse_json(self, content: bytes) -> list[dict]: + """Распарсить JSON и найти список записей.""" + data = json.loads(self._decode(content)) + node = self._extract_list_node(data) + if isinstance(node, list): + self._validate_record_count(len(node)) + return [self._as_dict(item) for item in node] + return [self._as_dict(node)] + + def _extract_list_node(self, data: Any) -> Any: + """Найти вероятный список записей в JSON-ответе.""" + if isinstance(data, list): + return data + if not isinstance(data, dict): + return data + + for key in LIST_KEYS: + value = data.get(key) + if isinstance(value, list): + return value + if isinstance(value, dict): + nested = self._extract_list_node(value) + if isinstance(nested, list): + return nested + return data + + def _parse_csv(self, content: bytes) -> list[dict]: + """Распарсить CSV с автоопределением разделителя.""" + text = self._decode(content) + sample = text[:4096] + try: + dialect = csv.Sniffer().sniff(sample, delimiters=",;\t|") + reader = csv.DictReader(io.StringIO(text), dialect=dialect) + except csv.Error: + reader = csv.DictReader(io.StringIO(text), delimiter=";") + result = [] + for row in reader: + result.append(dict(row)) + self._validate_record_count(len(result)) + return result + + def _parse_xlsx(self, content: bytes) -> list[dict]: + """Распарсить первый лист XLSX.""" + workbook = load_workbook( + filename=io.BytesIO(content), + read_only=True, + data_only=True, + ) + try: + sheet = workbook.active + row_iterator = sheet.iter_rows(values_only=True) + headers_row = next(row_iterator, None) + if not headers_row: + return [] + + headers = [str(value or "").strip() for value in headers_row] + result = [] + for row in row_iterator: + if not row or not any(row): + continue + result.append( + { + headers[index] + if index < len(headers) + else f"field_{index}": value + for index, value in enumerate(row) + } + ) + self._validate_record_count(len(result)) + return result + finally: + workbook.close() + + def _parse_xml(self, content: bytes) -> list[dict]: + """Распарсить XML с поиском повторяющихся элементов-записей.""" + text = self._clean_xml(content) + selector = self._select_xml_records(text) + if selector is None: + return [] + return self._collect_xml_records(text, selector) + + def _select_xml_records(self, text: str) -> tuple[str, int, str | None] | None: + """Выбрать XML-элементы, которые вероятнее всего являются записями.""" + ( + candidates, + direct_children_count, + first_candidate, + ) = self._count_xml_record_candidates(text) + repeated = [ + (depth, tag, count) + for (depth, tag), count in candidates.items() + if count > 1 + ] + if repeated: + depth, tag, _ = min(repeated, key=lambda item: (item[0], -item[2], item[1])) + return "tag", depth, tag + if direct_children_count: + return "depth", 2, None + if first_candidate: + depth, tag = first_candidate + return "tag", depth, tag + return None + + def _count_xml_record_candidates( + self, + text: str, + ) -> tuple[Counter[tuple[int, str]], int, tuple[int, str] | None]: + """Посчитать XML-элементы, похожие на записи.""" + candidates: Counter[tuple[int, str]] = Counter() + direct_children_count = 0 + first_candidate: tuple[int, str] | None = None + stack: list[Any] = [] + root_tag = "" + + for event, element in ET.iterparse( + io.StringIO(text), + events=("start", "end"), + ): + if event == "start": + stack.append(element) + if not root_tag: + root_tag = element.tag + continue + + depth = len(stack) + is_root = element.tag == root_tag and depth == 1 + has_record_shape = bool(element.attrib) or len(list(element)) > 0 + if not is_root and has_record_shape: + key = (depth, self._strip_namespace(element.tag)) + candidates[key] += 1 + if depth == 2: + direct_children_count += 1 + if first_candidate is None: + first_candidate = key + + element.clear() + if stack: + stack.pop() + + return candidates, direct_children_count, first_candidate + + def _collect_xml_records( + self, + text: str, + selector: tuple[str, int, str | None], + ) -> list[dict]: + """Потоково собрать XML-записи по выбранному selector.""" + mode, target_depth, target_tag = selector + result: list[dict] = [] + stack: list[Any] = [] + root_tag = "" + + for event, element in ET.iterparse( + io.StringIO(text), + events=("start", "end"), + ): + if event == "start": + stack.append(element) + if not root_tag: + root_tag = element.tag + continue + + depth = len(stack) + is_root = element.tag == root_tag and depth == 1 + tag = self._strip_namespace(element.tag) + is_candidate = not is_root and depth == target_depth + if mode == "tag": + is_candidate = is_candidate and tag == target_tag + + if is_candidate: + result.append(self._xml_to_dict(element)) + self._validate_record_count(len(result)) + element.clear() + elif is_root: + element.clear() + + if stack: + stack.pop() + + return result + + def _parse_html(self, content: bytes) -> list[dict]: + """Распарсить HTML-страницу источника.""" + soup = BeautifulSoup(self._decode(content), "html.parser") + card_rows = self._parse_source_cards(soup) + if card_rows: + return card_rows + + for table in soup.find_all("table"): + result = self._parse_html_table(table) + if result: + return result + return [] + + def _parse_source_cards(self, soup: BeautifulSoup) -> list[dict]: + """Распарсить карточки поисковой выдачи источников без таблиц.""" + if self.source not in EIS_CARD_SOURCES: + return [] + return self._parse_zakupki_cards(soup) + + def _parse_zakupki_cards(self, soup: BeautifulSoup) -> list[dict]: + """Распарсить карточки официального поиска ЕИС.""" + result = [] + labels = { + "Объект закупки", + "Объекты закупки", + "Заказчик", + "Наименование заказчика", + "Наименование документа", + "Наименование (ФИО) недобросовестного поставщика", + "ИНН (аналог ИНН)", + "Номер реестровой записи в ЕРУЗ", + "Начальная цена", + "Цена контракта", + "Размещено", + "Обновлено", + "Окончание подачи заявок", + "Включено", + "Исключено", + "Утверждение", + "Вступление в силу", + "Способы закупки", + } + for card in soup.select(".search-registry-entry-block"): + lines = self._extract_text_lines(card) + if not lines: + continue + + row: dict[str, Any] = {"raw_text": "\n".join(lines)} + number_index = self._fill_zakupki_number(row, lines) + self._fill_label_pairs(row, lines, labels) + self._fill_zakupki_status(row, lines, number_index, labels) + + link = card.find("a", href=True) + if link: + row["url"] = urljoin(ZAKUPKI_BASE_URL, link["href"]) + if lines[0].endswith("-ФЗ"): + row["law"] = lines[0] + + result.append(row) + self._validate_record_count(len(result)) + return result + + def _parse_html_table(self, table: Any) -> list[dict]: + """Распарсить HTML-таблицу с th или строкой-заголовком в td.""" + rows = table.find_all("tr") + if not rows: + return [] + if self.source == FAS_GOZ_SOURCE: + return self._parse_fas_goz_table_rows(rows) + + headers, data_rows = self._extract_table_headers(rows) + if not headers: + return [] + + result = [] + for row in data_rows: + cells = row.find_all(["td", "th"]) + if not cells: + continue + values = [cell.get_text(" ", strip=True) for cell in cells] + if not any(values): + continue + result.append( + { + headers[index] if index < len(headers) else f"field_{index}": value + for index, value in enumerate(values) + } + ) + self._validate_record_count(len(result)) + return result + + def _parse_fas_goz_table_rows(self, rows: list[Any]) -> list[dict]: + """Распарсить таблицу ФАС ГОЗ с многострочным заголовком.""" + headers = [ + "Номер реестровой записи", + "Уполномоченный орган", + "Постановление", + "Дата вступления постановления", + "Исполнение постановления", + "Полное наименование лица", + "Фирменное наименование лица", + "Адрес лица", + "ИНН", + ] + result = [] + for row in rows: + values = [ + cell.get_text(" ", strip=True) for cell in row.find_all(["td", "th"]) + ] + if len(values) < 8 or self._is_fas_goz_header_number_row(values): + continue + result.append( + { + headers[index] if index < len(headers) else f"field_{index}": value + for index, value in enumerate(values) + } + ) + self._validate_record_count(len(result)) + return result + + def _is_fas_goz_header_number_row(self, values: list[str]) -> bool: + """Определить служебную строку ФАС с номерами колонок 1..8.""" + return all(value.isdigit() for value in values) and values[:3] == [ + "1", + "2", + "3", + ] + + def _extract_table_headers(self, rows: list[Any]) -> tuple[list[str], list[Any]]: + """Выделить заголовки таблицы, включая госстраницы без th.""" + first_header = rows[0].find_all("th") + if first_header: + headers = [cell.get_text(" ", strip=True) for cell in first_header] + return headers, rows[1:] + + first_cells = rows[0].find_all(["th", "td"]) + headers = [cell.get_text(" ", strip=True) for cell in first_cells] + if self._looks_like_table_header(headers): + return headers, rows[1:] + return [], [] + + def _looks_like_table_header(self, headers: list[str]) -> bool: + """Отсеять layout-таблицы без th, но принять реестровые таблицы ФАС.""" + normalized = " ".join(headers).lower() + if len(headers) < 2 or not any(headers): + return False + markers = ( + "номер", + "реестр", + "наименование", + "инн", + "огрн", + "дата", + "информация", + "лиценз", + "постановлен", + ) + return sum(marker in normalized for marker in markers) >= 2 + + def _extract_text_lines(self, node: Any) -> list[str]: + """Получить непустые строки видимого текста.""" + return [ + line.strip() + for line in node.get_text("\n", strip=True).splitlines() + if line.strip() + ] + + def _fill_zakupki_number(self, row: dict, lines: list[str]) -> int | None: + """Найти номер карточки ЕИС.""" + for index, line in enumerate(lines): + match = re.search(r"№\s*([0-9A-Za-zА-Яа-яЁё/_-]+)", line) + if match: + row["number"] = match.group(1) + row["registry_number"] = match.group(1) + return index + return None + + def _fill_label_pairs( + self, + row: dict, + lines: list[str], + labels: set[str], + ) -> None: + """Заполнить пары label -> следующая строка из карточки.""" + for index, line in enumerate(lines[:-1]): + if line in labels: + row[line] = lines[index + 1] + + def _fill_zakupki_status( + self, + row: dict, + lines: list[str], + number_index: int | None, + labels: set[str], + ) -> None: + """Выделить статус карточки ЕИС, если он расположен после номера.""" + if number_index is None: + return + status_index = number_index + 1 + if status_index < len(lines) and lines[status_index] not in labels: + row["status"] = lines[status_index] + + def _normalize_row(self, row: dict, index: int) -> GenericParserItem: + """Привести строку источника к общей DTO.""" + payload = self._json_safe(row) + external_id = self._lookup( + payload, + [ + "external_id", + "id", + "uid", + "guid", + "number", + "registry_number", + "registration_number", + "purchase_number", + "contract_number", + "case_number", + "vacancy_id", + "product_reg_number_2023", + "product_reg_number_2022", + "res_number", + "product_gisp_url", + "регистрационный номер лицензии", + "№ сертификата", + "номер реестровой записи", + "номер реестровой записи в еруз", + "номер", + "реестровый номер", + "номер дела", + "идентификатор", + ], + ) + if not external_id: + external_id = self._make_external_id(payload) + + return GenericParserItem( + source=self.source, + external_id=external_id, + inn=self._lookup( + payload, + [ + "inn", + "supplier_inn", + "employer_inn", + "org_inn", + "инн", + "инн лицензиата", + "инн аналог инн", + ], + ), + ogrn=self._lookup( + payload, + [ + "ogrn", + "supplier_ogrn", + "ogrn_id", + "org_ogrn", + "огрн", + "огрн или огрип лицензиата", + ], + ), + organisation_name=self._lookup( + payload, + [ + "organisation_name", + "organization_name", + "company_name", + "supplier_name", + "employer_name", + "short_name", + "shortName", + "org_name", + "name", + "наименование", + "организация", + "работодатель", + "заказчик", + "заявитель", + "наименование заказчика", + "наименование фио недобросовестного поставщика", + "полное сокращенное наименование лицензиата", + "информация о лице", + "полное наименование лица", + "фирменное наименование лица", + ], + ), + title=self._lookup( + payload, + [ + "title", + "subject", + "description", + "purchase_object", + "vacancy_name", + "product_name", + "product_spec", + "наименование средства шифр", + "объект закупки", + "объекты закупки", + "наименование документа", + "наименование закупки", + "предмет", + "описание", + "должность", + "продукция", + ], + ), + record_date=self._lookup( + payload, + [ + "date", + "publish_date", + "publication_date", + "created_at", + "actualBfoDate", + "period", + "res_date", + "дата", + "дата предоставления лицензии", + "дата внесения в реестр", + "дата вступления постановления", + "размещено", + "обновлено", + "включено", + "утверждение", + ], + ), + amount=self._to_decimal( + self._lookup( + payload, + [ + "amount", + "price", + "sum", + "max_price", + "salary", + "gainSum", + "product_score_value", + "сумма", + "цена", + "начальная цена", + "цена контракта", + ], + ) + ), + status=self._lookup( + payload, + [ + "status", + "state", + "statusCode", + "res_valid_till", + "срок действия сертификата", + "исполнение постановления", + "статус", + "состояние", + ], + ), + url=self._lookup( + payload, + [ + "url", + "href", + "link", + "source_url", + "product_gisp_url", + "gisp_url", + "ссылка", + ], + ), + payload=payload, + ) + + def _lookup(self, row: dict, candidates: list[str]) -> str: + """Найти значение по списку возможных ключей.""" + normalized: dict[str, Any] = {} + for key, value in self._iter_lookup_values(row): + normalized.setdefault(self._normalize_key(key), value) + + for candidate in candidates: + value = normalized.get(self._normalize_key(candidate)) + if value not in (None, ""): + return self._clean_lookup_value(value) + return "" + + def _iter_lookup_values(self, row: dict, prefix: str = ""): + """Итерировать значения, включая вложенные JSON-объекты.""" + for key, value in row.items(): + key_str = str(key) + full_key = f"{prefix}.{key_str}" if prefix else key_str + yield key_str, value + yield full_key, value + if isinstance(value, dict): + yield from self._iter_lookup_values(value, full_key) + + def _clean_lookup_value(self, value: Any) -> str: + """Привести значение lookup к чистому тексту.""" + if isinstance(value, dict | list | tuple): + return "" + text = str(value).strip() + if "<" in text and ">" in text: + text = BeautifulSoup(text, "html.parser").get_text(" ", strip=True) + return re.sub(r"\s+", " ", text).strip() + + def _make_external_id(self, payload: dict) -> str: + """Сформировать стабильный внешний ID, если источник его не дал.""" + raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, default=str) + digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:24] + return f"{self.source}:{digest}" + + def _validate_file_size(self, size: int, file_name: str) -> None: + """Проверить размер одиночного файла до разбора.""" + if size > self.max_file_size_bytes: + raise StructuredDataClientError( + f"File {file_name} exceeds size limit: {size} bytes" + ) + + def _validate_record_count(self, count: int) -> None: + """Ограничить количество строк, удерживаемых в памяти.""" + if count > self.max_records: + raise StructuredDataClientError(f"Record count exceeds limit: {count}") + + def _validate_zip_member(self, info: zipfile.ZipInfo) -> None: + """Проверить ZIP-элемент перед чтением в память.""" + self._validate_file_size(info.file_size, info.filename) + if info.compress_size <= 0: + return + compression_ratio = info.file_size / info.compress_size + if compression_ratio > MAX_ZIP_COMPRESSION_RATIO: + raise StructuredDataClientError( + f"ZIP member compression ratio is too high: {info.filename}" + ) + + def _decode(self, content: bytes) -> str: + """Декодировать выгрузку с учётом частой cp1251 в госданных.""" + for encoding in ("utf-8-sig", "utf-8", "cp1251"): + try: + return content.decode(encoding) + except UnicodeDecodeError: + continue + return content.decode("utf-8", errors="replace") + + def _extract_preferred_html_download_url( + self, + content: bytes, + *, + base_url: str, + ) -> str: + """Найти официальный XLSX download на HTML-странице источника.""" + if self.source == FSTEC_SOURCE: + return self._extract_fstec_download_url(content, base_url=base_url) + if self.source != MPT_PRODUCTS_SOURCE: + return "" + text = self._decode(content).lstrip() + if not self._looks_like_html(text): + return "" + + soup = BeautifulSoup(text, "html.parser") + candidates = [] + for link in soup.find_all("a", href=True): + label = link.get_text(" ", strip=True) + href = link["href"] + if "xlsx" not in label.lower() and "xls" not in href.lower(): + continue + candidates.append((label, urljoin(base_url, href))) + + for label, url in candidates: + if GISP_PRODUCTS_DOWNLOAD_LABEL.lower() in label.lower(): + return url + return candidates[0][1] if candidates else "" + + def _extract_fstec_download_url(self, content: bytes, *, base_url: str) -> str: + """Найти CSV-выгрузку реестра ФСТЭК на HTML-странице.""" + text = self._decode(content).lstrip() + if not self._looks_like_html(text): + return "" + + soup = BeautifulSoup(text, "html.parser") + candidates = [ + urljoin(base_url, link["href"]) + for link in soup.find_all("a", href=True) + if "module=rfiles" in link["href"] or "/uploads/reg" in link["href"] + ] + for url in candidates: + if "file=1" in url or url.lower().endswith(".csv"): + return url + return candidates[0] if candidates else "" + + def _clean_xml(self, content: bytes) -> str: + """Удалить управляющие символы, которые часто ломают XML выгрузки.""" + text = self._decode(content) + return re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text) + + def _xml_to_dict(self, element: Any) -> dict: + """Преобразовать XML-элемент в плоский словарь.""" + data = dict(element.attrib) + children = list(element) + if children: + for child in children: + key = self._strip_namespace(child.tag) + if len(list(child)) > 0: + value = self._xml_to_dict(child) + else: + value = child.text or "" + self._set_xml_value(data, key, value) + elif element.text: + data[self._strip_namespace(element.tag)] = element.text + return data + + def _set_xml_value(self, data: dict, key: str, value: Any) -> None: + """Сохранить повторяющиеся XML-теги списком, не перетирая значения.""" + if key not in data: + data[key] = value + return + current = data[key] + if isinstance(current, list): + current.append(value) + else: + data[key] = [current, value] + + def _strip_namespace(self, tag: str) -> str: + """Убрать XML namespace из имени тега.""" + return tag.rsplit("}", 1)[-1] + + def _as_dict(self, item: Any) -> dict: + """Привести JSON-элемент к словарю.""" + if isinstance(item, dict): + return item + return {"value": item} + + def _json_safe(self, row: dict) -> dict: + """Сделать payload сериализуемым в JSON.""" + result = {} + for key, value in row.items(): + result[str(key)] = self._json_safe_value(value) + return result + + def _json_safe_value(self, value: Any) -> Any: + """Рекурсивно привести значение к JSON-совместимому виду.""" + if isinstance(value, str | int | float | bool) or value is None: + return value + if isinstance(value, Decimal): + return str(value) + if isinstance(value, dict): + return { + str(key): self._json_safe_value(item) for key, item in value.items() + } + if isinstance(value, list | tuple): + return [self._json_safe_value(item) for item in value] + return str(value) + + def _normalize_key(self, key: str) -> str: + """Нормализовать ключ для сопоставления русских и английских колонок.""" + return re.sub(r"[^0-9a-zа-яё]+", "", str(key).lower()) + + def _to_decimal(self, value: str) -> Decimal | None: + """Преобразовать строковое число в Decimal.""" + if not value: + return None + cleaned = value.replace(" ", "").replace("\xa0", "").replace(",", ".") + cleaned = re.sub(r"[^0-9.\-]", "", cleaned) + if not cleaned: + return None + try: + return Decimal(cleaned) + except InvalidOperation: + return None + + def close(self) -> None: + """Закрыть HTTP клиент.""" + if self._http_client is not None: + self._http_client.close() + self._http_client = None + + def __enter__(self) -> "StructuredDataClient": + """Поддержка context manager.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Закрытие при выходе из context manager.""" + self.close() diff --git a/src/apps/parsers/clients/trudvsem/__init__.py b/src/apps/parsers/clients/trudvsem/__init__.py new file mode 100644 index 0000000..2671a95 --- /dev/null +++ b/src/apps/parsers/clients/trudvsem/__init__.py @@ -0,0 +1,5 @@ +"""Клиент портала Работа России.""" + +from apps.parsers.clients.trudvsem.client import TrudvsemClient, TrudvsemClientError + +__all__ = ["TrudvsemClient", "TrudvsemClientError"] diff --git a/src/apps/parsers/clients/trudvsem/client.py b/src/apps/parsers/clients/trudvsem/client.py new file mode 100644 index 0000000..3987547 --- /dev/null +++ b/src/apps/parsers/clients/trudvsem/client.py @@ -0,0 +1,199 @@ +"""Клиент API вакансий портала Работа России.""" + +import hashlib +import json +import logging +from dataclasses import dataclass, field +from decimal import Decimal, InvalidOperation +from typing import Any + +from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError +from apps.parsers.clients.common.schemas import GenericParserItem +from apps.parsers.models import ParserLoadLog + +logger = logging.getLogger(__name__) + +DEFAULT_BASE_URL = "http://opendata.trudvsem.ru/api/v1" +VACANCIES_ENDPOINT = "/vacancies" + + +class TrudvsemClientError(HTTPClientError): + """Ошибка клиента Работа России.""" + + pass + + +@dataclass +class TrudvsemClient: + """Клиент открытого API вакансий Работа России.""" + + proxies: list[str] | None = None + base_url: str = DEFAULT_BASE_URL + timeout: int = 120 + company_search_max_pages: int = 20 + _http_client: BaseHTTPClient | None = field(default=None, repr=False) + + @property + def http_client(self) -> BaseHTTPClient: + """Ленивая инициализация HTTP клиента.""" + if self._http_client is None: + self._http_client = BaseHTTPClient( + base_url=self.base_url, + proxies=self.proxies, + timeout=self.timeout, + ) + return self._http_client + + def fetch_vacancies( + self, + *, + limit: int = 100, + offset: int = 0, + region_code: str | None = None, + company_inn: str | None = None, + text: str | None = None, + ) -> list[GenericParserItem]: + """ + Получить вакансии из открытого API. + + Args: + limit: Размер страницы. + offset: Смещение. + region_code: Код региона в API Работа России. + company_inn: ИНН работодателя для локальной фильтрации результата. + text: Текстовый фильтр API. + """ + if company_inn: + return self._fetch_vacancies_by_company_inn( + limit=limit, + offset=offset, + region_code=region_code, + company_inn=company_inn, + text=text, + ) + + params: dict[str, Any] = {"limit": limit, "offset": offset} + if region_code: + params["region"] = region_code + if text: + params["text"] = text + + try: + response = self.http_client.get_json(VACANCIES_ENDPOINT, params=params) + except HTTPClientError: + raise + except Exception as e: + raise TrudvsemClientError(f"Failed to fetch vacancies: {e}") from e + + vacancies = self._extract_vacancies(response) + records = [self._map_vacancy(vacancy) for vacancy in vacancies] + logger.info("Fetched %d Trudvsem vacancies", len(records)) + return records + + def _fetch_vacancies_by_company_inn( + self, + *, + limit: int, + offset: int, + region_code: str | None, + company_inn: str, + text: str | None, + ) -> list[GenericParserItem]: + """Искать вакансии работодателя по страницам, чтобы не дать ложный ноль.""" + records: list[GenericParserItem] = [] + current_offset = offset + page_size = max(limit, 1) + + for _ in range(self.company_search_max_pages): + params: dict[str, Any] = {"limit": page_size, "offset": current_offset} + if region_code: + params["region"] = region_code + if text: + params["text"] = text + + try: + response = self.http_client.get_json(VACANCIES_ENDPOINT, params=params) + except HTTPClientError: + raise + except Exception as e: + raise TrudvsemClientError(f"Failed to fetch vacancies: {e}") from e + + vacancies = self._extract_vacancies(response) + page_records = [self._map_vacancy(vacancy) for vacancy in vacancies] + records.extend( + record for record in page_records if record.inn == company_inn + ) + if len(records) >= limit: + result = records[:limit] + logger.info("Fetched %d Trudvsem vacancies by INN", len(result)) + return result + if len(vacancies) < page_size: + logger.info("Fetched %d Trudvsem vacancies by INN", len(records)) + return records + current_offset += page_size + + raise TrudvsemClientError( + "Company INN search reached page limit before exhausting vacancies" + ) + + def _extract_vacancies(self, response: dict) -> list[dict]: + """Достать список вакансий из ответа API.""" + results = response.get("results", response) + vacancies = results.get("vacancies", []) if isinstance(results, dict) else [] + + normalized = [] + for item in vacancies: + if isinstance(item, dict) and isinstance(item.get("vacancy"), dict): + normalized.append(item["vacancy"]) + elif isinstance(item, dict): + normalized.append(item) + return normalized + + def _map_vacancy(self, vacancy: dict) -> GenericParserItem: + """Преобразовать вакансию API в общую DTO.""" + company = vacancy.get("company") or {} + salary = vacancy.get("salary") or "" + external_id = str(vacancy.get("id") or vacancy.get("source_id") or "") + if not external_id: + raw = json.dumps(vacancy, ensure_ascii=False, sort_keys=True, default=str) + external_id = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:24] + return GenericParserItem( + source=ParserLoadLog.Source.TRUDVSEM, + external_id=external_id, + inn=str(company.get("inn") or vacancy.get("company_inn") or ""), + ogrn=str(company.get("ogrn") or ""), + organisation_name=str(company.get("name") or vacancy.get("company") or ""), + title=str(vacancy.get("job-name") or vacancy.get("name") or ""), + record_date=str(vacancy.get("creation-date") or vacancy.get("date") or ""), + amount=self._parse_salary(salary), + status=str(vacancy.get("state") or ""), + url=str(vacancy.get("vac_url") or vacancy.get("url") or ""), + payload=vacancy, + ) + + def _parse_salary(self, salary: Any) -> Decimal | None: + """Достать числовую зарплату из ответа API.""" + if isinstance(salary, dict): + value = salary.get("from") or salary.get("to") or salary.get("salary") + else: + value = salary + if value in (None, ""): + return None + try: + return Decimal(str(value).replace(" ", "").replace(",", ".")) + except InvalidOperation: + return None + + def close(self) -> None: + """Закрыть HTTP клиент.""" + if self._http_client is not None: + self._http_client.close() + self._http_client = None + + def __enter__(self) -> "TrudvsemClient": + """Поддержка context manager.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Закрытие при выходе из context manager.""" + self.close() diff --git a/src/apps/parsers/frontend_compat.py b/src/apps/parsers/frontend_compat.py new file mode 100644 index 0000000..ad078b4 --- /dev/null +++ b/src/apps/parsers/frontend_compat.py @@ -0,0 +1,790 @@ +"""Compatibility API for frontend contracts that existed on the dev branch.""" + +from __future__ import annotations + +import csv +from collections.abc import Iterable +from dataclasses import dataclass +from datetime import timedelta +from typing import Any + +from apps.core.response import api_error_response, api_response +from apps.core.services import BackgroundJobService +from apps.parsers.models import ( + GenericParserRecord, + ParserLoadLog, +) +from apps.parsers.serializers import ( + ParserLoadLogSerializer, + ParserRunRequestSerializer, +) +from apps.parsers.source_registry import PARSER_SOURCES +from apps.parsers.views import ( + NATIVE_RECORD_MODELS, + TASKS_BY_NAME, + build_task_kwargs, +) +from django.core.cache import cache +from django.core.paginator import Paginator +from django.db.models import CharField, Max, Q +from django.db.models.functions import Cast +from django.http import Http404, HttpResponse +from drf_yasg import openapi +from drf_yasg.utils import swagger_auto_schema +from rest_framework import status +from rest_framework.exceptions import ValidationError +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +from rest_framework.views import APIView + +FRONTEND_SOURCES_TAG = "Frontend Sources" +PARSING_SETTINGS_TAG = "Parsing Settings" +SYSTEM_LOGS_TAG = "System Logs" + +ACTIVE_JOB_STATUSES = {"pending", "started", "retry"} +SUCCESS_LOAD_STATUSES = {"success", "skipped"} +ERROR_LOAD_STATUSES = {"failed", "failure", "error"} + +PARSING_SETTINGS_CACHE_KEY = "parsers:frontend_compat:parsing_settings" +PARSING_SETTINGS_FIELDS = { + "manufacturers_and_products", + "public_procurements", + "defense_unreliable_suppliers", + "planned_inspections", + "arbitration_cases", + "bankruptcy_procedures", + "information_security_registries", +} +PARSING_SETTINGS_DEFAULTS = { + "manufacturers_and_products": "daily", + "public_procurements": "daily", + "defense_unreliable_suppliers": "weekly", + "planned_inspections": "monthly", + "arbitration_cases": "daily", + "bankruptcy_procedures": "daily", + "information_security_registries": "yearly", +} +PARSING_SETTINGS_FREQUENCIES = {"daily", "weekly", "monthly", "yearly"} +LOG_STATUS_LABELS = { + "success": "Успешно", + "failed": "Ошибка", + "failure": "Ошибка", + "error": "Ошибка", + "in_progress": "В процессе", + "pending": "В очереди", + "started": "В процессе", + "retry": "Повтор", + "skipped": "Пропущено", +} + + +@dataclass(frozen=True) +class FrontendSourceCardDefinition: + """Aggregated source card expected by the old frontend API.""" + + slug: str + title: str + description: str + order: int + source_keys: tuple[str, ...] + refresh_interval: timedelta | None = None + is_available: bool = True + refresh_params: tuple[dict[str, Any], ...] = () + + +SOURCE_CARD_DEFINITIONS = ( + FrontendSourceCardDefinition( + slug="financial-indicators", + title="Финансово-экономические показатели", + description="Финансовая отчетность и показатели ФНС.", + order=10, + source_keys=("fns_financial",), + refresh_interval=timedelta(days=1), + ), + FrontendSourceCardDefinition( + slug="public-procurements", + title="Государственные закупки по 44-ФЗ и 223-ФЗ", + description="Извещения, закупочные процедуры и контракты ЕИС.", + order=20, + source_keys=("procurements_44fz", "procurements_223fz", "contracts"), + refresh_interval=timedelta(hours=6), + ), + FrontendSourceCardDefinition( + slug="manufacturers-and-products", + title="Производители и продукция России", + description="Данные Минпромторга о производителях и промышленной продукции.", + order=30, + source_keys=("industrial", "manufactures", "mpt_products"), + refresh_interval=timedelta(days=1), + ), + FrontendSourceCardDefinition( + slug="planned-inspections", + title="Плановые проверки Генпрокуратуры России", + description="Плановые и внеплановые проверки из открытых данных.", + order=40, + source_keys=("inspections", "sync_inspections"), + refresh_params=( + { + "name": "max_months_per_law", + "label": "Месяцев на закон", + "description": "Ограничение объема синхронизации проверок.", + "required": False, + "type": "integer", + "default": 1, + }, + ), + ), + FrontendSourceCardDefinition( + slug="defense-unreliable-suppliers", + title="Недобросовестные поставщики и ГОЗ", + description="Реестры ФАС и ЕИС по поставщикам и уклонению от ГОЗ.", + order=50, + source_keys=("unfair_suppliers", "fas_goz"), + refresh_interval=timedelta(days=1), + ), + FrontendSourceCardDefinition( + slug="arbitration-cases", + title="Арбитражные дела", + description="Арбитражные дела по организациям.", + order=60, + source_keys=("arbitration",), + refresh_interval=timedelta(days=1), + ), + FrontendSourceCardDefinition( + slug="bankruptcy-procedures", + title="Банкротства Федресурс", + description="Сведения о процедурах банкротства.", + order=70, + source_keys=("fedresurs_bankruptcy",), + refresh_interval=timedelta(days=1), + ), + FrontendSourceCardDefinition( + slug="information-security-registries", + title="Реестры ФСТЭК", + description="Реестры по информационной безопасности.", + order=80, + source_keys=("fstec",), + refresh_interval=timedelta(days=30), + ), + FrontendSourceCardDefinition( + slug="labor-vacancies", + title="Вакансии Работа России", + description="Вакансии работодателей из ЕЦП Работа в России.", + order=90, + source_keys=("trudvsem",), + refresh_params=( + { + "name": "company_inn", + "label": "ИНН работодателя", + "description": "Фильтр вакансий по ИНН работодателя.", + "required": False, + "type": "string", + "default": None, + }, + { + "name": "text", + "label": "Текст", + "description": "Поисковая строка вакансии.", + "required": False, + "type": "string", + "default": None, + }, + { + "name": "limit", + "label": "Лимит", + "description": "Размер страницы API Работа России.", + "required": False, + "type": "integer", + "default": 100, + }, + ), + ), +) +SOURCE_CARD_BY_SLUG = {item.slug: item for item in SOURCE_CARD_DEFINITIONS} + + +def _source_keys_to_model_sources(source_keys: Iterable[str]) -> list[str]: + """Map source registry keys to ParserLoadLog source values without duplicates.""" + sources = [] + for source_key in source_keys: + descriptor = PARSER_SOURCES.get(source_key) + if descriptor and descriptor.source not in sources: + sources.append(descriptor.source) + return sources + + +def _card_slug_for_parser_source(parser_source: str) -> str | None: + for definition in SOURCE_CARD_DEFINITIONS: + if parser_source in _source_keys_to_model_sources(definition.source_keys): + return definition.slug + return None + + +def _card_title_for_parser_source(parser_source: str) -> str | None: + slug = _card_slug_for_parser_source(parser_source) + if not slug: + return None + return SOURCE_CARD_BY_SLUG[slug].title + + +def _get_card_definition(slug: str) -> FrontendSourceCardDefinition: + definition = SOURCE_CARD_BY_SLUG.get(slug) + if definition is None: + raise Http404("Карточка источника не найдена") + return definition + + +def _record_queryset_for_source(source: str): + if source in NATIVE_RECORD_MODELS: + return NATIVE_RECORD_MODELS[source].objects.all() + return GenericParserRecord.objects.filter(source=source) + + +def _records_count_for_source(source: str) -> int: + return _record_queryset_for_source(source).count() + + +def _organizations_count_for_source(source: str) -> int: + queryset = _record_queryset_for_source(source) + field = "inn" + return queryset.exclude(**{field: ""}).values(field).distinct().count() + + +def _data_timestamp_for_source(source: str): + return _record_queryset_for_source(source).aggregate( + last_updated=Max("updated_at") + )["last_updated"] + + +def _latest_load_for_sources( + sources: list[str], + *, + statuses: set[str] | None = None, +) -> ParserLoadLog | None: + queryset = ParserLoadLog.objects.filter(source__in=sources) + if statuses: + queryset = queryset.filter(status__in=statuses) + return queryset.order_by("-updated_at", "-created_at").first() + + +def _serialize_load(load_log: ParserLoadLog | None) -> dict[str, Any] | None: + if load_log is None: + return None + return { + "batch_id": load_log.batch_id, + "source": load_log.source, + "source_display": load_log.get_source_display(), + "records_count": load_log.records_count, + "status": load_log.status, + "error_message": load_log.error_message, + "created_at": load_log.created_at, + "updated_at": load_log.updated_at, + } + + +def _serialize_active_job(job) -> dict[str, Any]: + return { + "task_id": job.task_id, + "task_name": job.task_name, + "status": job.status, + "progress": job.progress, + "progress_message": job.progress_message, + "started_at": job.started_at, + "created_at": job.created_at, + "meta": job.meta, + } + + +def _active_tasks_for_definition( + definition: FrontendSourceCardDefinition, +) -> list[dict]: + task_names = [ + PARSER_SOURCES[source_key].task_name + for source_key in definition.source_keys + if source_key in PARSER_SOURCES + ] + queryset = BackgroundJobService.get_queryset().filter( + task_name__in=task_names, + status__in=ACTIVE_JOB_STATUSES, + ) + return [_serialize_active_job(job) for job in queryset.order_by("-created_at")[:10]] + + +def _status_label(status_value: str) -> str: + labels = { + "success": "Обновлено", + "in_progress": "В процессе", + "error": "Ошибка", + "idle": "Нет данных", + "unavailable": "Не подключено", + } + return labels.get(status_value, status_value) + + +def _status_for_card( + definition: FrontendSourceCardDefinition, + *, + active_tasks: list[dict], + latest_load: ParserLoadLog | None, + last_updated_at, +) -> str: + if not definition.is_available: + return "unavailable" + if active_tasks or (latest_load and latest_load.status == "in_progress"): + return "in_progress" + if latest_load and latest_load.status in ERROR_LOAD_STATUSES: + return "error" + if last_updated_at: + return "success" + return "idle" + + +def _build_source_item(source_key: str) -> dict[str, Any]: + descriptor = PARSER_SOURCES[source_key] + source = descriptor.source + latest_load = _latest_load_for_sources([source]) + latest_success_load = _latest_load_for_sources( + [source], statuses=SUCCESS_LOAD_STATUSES + ) + last_updated_at = ( + latest_success_load.updated_at + if latest_success_load + else _data_timestamp_for_source(source) + ) + return { + "code": descriptor.key, + "title": descriptor.title, + "description": descriptor.data_scope, + "parser_source": source, + "parser_source_display": descriptor.title, + "records_count": _records_count_for_source(source), + "organizations_count": _organizations_count_for_source(source), + "last_updated_at": last_updated_at, + "latest_load": _serialize_load(latest_load), + "latest_success_load": _serialize_load(latest_success_load), + } + + +def _build_source_card(definition: FrontendSourceCardDefinition) -> dict[str, Any]: + source_items = [ + _build_source_item(source_key) + for source_key in definition.source_keys + if source_key in PARSER_SOURCES + ] + sources = _source_keys_to_model_sources(definition.source_keys) + latest_load = _latest_load_for_sources(sources) + latest_success_load = _latest_load_for_sources( + sources, statuses=SUCCESS_LOAD_STATUSES + ) + timestamps = [ + item["last_updated_at"] for item in source_items if item["last_updated_at"] + ] + last_updated_at = latest_success_load.updated_at if latest_success_load else None + if last_updated_at is None and timestamps: + last_updated_at = max(timestamps) + + active_tasks = _active_tasks_for_definition(definition) + progress = ( + round(sum(int(task["progress"]) for task in active_tasks) / len(active_tasks)) + if active_tasks + else 0 + ) + status_value = _status_for_card( + definition, + active_tasks=active_tasks, + latest_load=latest_load, + last_updated_at=last_updated_at, + ) + next_update_at = ( + last_updated_at + definition.refresh_interval + if last_updated_at and definition.refresh_interval + else None + ) + + return { + "slug": definition.slug, + "title": definition.title, + "description": definition.description, + "order": definition.order, + "is_available": definition.is_available, + "status": status_value, + "status_label": _status_label(status_value), + "progress": progress, + "records_count": sum(item["records_count"] for item in source_items), + "organizations_count": sum( + item["organizations_count"] for item in source_items + ), + "last_updated_at": last_updated_at, + "next_update_at": next_update_at, + "error_message": latest_load.error_message if latest_load else "", + "task_names": [ + PARSER_SOURCES[source_key].task_name + for source_key in definition.source_keys + if source_key in PARSER_SOURCES + ], + "refresh_requires_params": any( + item.get("required") for item in definition.refresh_params + ), + "refresh_params": list(definition.refresh_params), + "active_tasks": active_tasks, + "source_items": source_items, + "latest_load": _serialize_load(latest_load), + "latest_success_load": _serialize_load(latest_success_load), + } + + +def _source_status_rows() -> list[dict[str, Any]]: + cards = sorted( + (_build_source_card(definition) for definition in SOURCE_CARD_DEFINITIONS), + key=lambda item: ( + item["last_updated_at"] is None, + -(item["last_updated_at"].timestamp()) if item["last_updated_at"] else 0, + item["title"], + ), + ) + return [ + { + "row_number": index, + "slug": card["slug"], + "source": card["title"], + "status": card["status"], + "status_label": card["status_label"], + "actualized_at": card["last_updated_at"], + "next_update_at": card["next_update_at"], + "records_count": card["records_count"], + "organizations_count": card["organizations_count"], + "progress": card["progress"], + "error_message": card["error_message"], + "active_tasks": card["active_tasks"], + } + for index, card in enumerate(cards, start=1) + ] + + +def _refresh_params_from_request(request) -> dict[str, Any]: + raw_params = request.data.get("params", request.data) + return dict(raw_params) if isinstance(raw_params, dict) else {} + + +def _start_source_refresh( + source_key: str, params: dict[str, Any], user_id: int +) -> dict[str, str]: + descriptor = PARSER_SOURCES[source_key] + serializer = ParserRunRequestSerializer(data=params) + serializer.is_valid(raise_exception=True) + validated = { + key: value + for key, value in serializer.validated_data.items() + if value not in ("", None) + } + task = TASKS_BY_NAME[descriptor.task_name] + task_kwargs = build_task_kwargs(source_key, validated, user_id) + async_result = task.delay(**task_kwargs) + return {"task_id": async_result.id, "task_name": descriptor.task_name} + + +class SourceCardListCompatView(APIView): + """Old frontend alias: GET /api/v1/sources/.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_summary="Frontend source cards", + tags=[FRONTEND_SOURCES_TAG], + responses={200: "Source card list"}, + ) + def get(self, request): + cards = [ + _build_source_card(definition) for definition in SOURCE_CARD_DEFINITIONS + ] + cards.sort(key=lambda item: item["order"]) + return api_response(cards) + + +class SourceTaskStatusListCompatView(APIView): + """Old frontend alias: GET /api/v1/sources/statuses/.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_summary="Frontend source statuses", + tags=[FRONTEND_SOURCES_TAG], + responses={200: "Source status list"}, + ) + def get(self, request): + return api_response(_source_status_rows()) + + +class SourceCardDetailCompatView(APIView): + """Old frontend alias: GET /api/v1/sources/{slug}/.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_summary="Frontend source card detail", + tags=[FRONTEND_SOURCES_TAG], + responses={200: "Source card detail", 404: "Not found"}, + ) + def get(self, request, slug: str): + return api_response(_build_source_card(_get_card_definition(slug))) + + +class SourceCardRefreshCompatView(APIView): + """Old frontend alias: POST /api/v1/sources/{slug}/refresh/.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_summary="Refresh frontend source card", + request_body=openapi.Schema(type=openapi.TYPE_OBJECT), + tags=[FRONTEND_SOURCES_TAG], + responses={202: "Queued task ids"}, + ) + def post(self, request, slug: str): + params = _refresh_params_from_request(request) + if params.get("proxies") and not request.user.is_staff: + return api_error_response( + [ + { + "code": "proxy_override_forbidden", + "message": "Прокси для запуска парсера может задавать только staff", + } + ], + status_code=status.HTTP_403_FORBIDDEN, + ) + + definition = _get_card_definition(slug) + tasks = [ + _start_source_refresh(source_key, params, request.user.id) + for source_key in definition.source_keys + if source_key in PARSER_SOURCES + ] + return Response( + { + "task_id": tasks[0]["task_id"] if tasks else None, + "status": "accepted", + "tasks": tasks, + }, + status=status.HTTP_202_ACCEPTED, + ) + + +class ParsingSettingsCompatView(APIView): + """Old frontend alias: GET/PATCH /api/v1/parsing/settings/.""" + + permission_classes = [IsAuthenticated] + + @staticmethod + def _get_settings() -> dict[str, str]: + cached = cache.get(PARSING_SETTINGS_CACHE_KEY) or {} + return {**PARSING_SETTINGS_DEFAULTS, **cached} + + @swagger_auto_schema( + operation_summary="Get parsing settings", + tags=[PARSING_SETTINGS_TAG], + responses={200: "Parsing settings"}, + ) + def get(self, request): + return Response(self._get_settings(), status=status.HTTP_200_OK) + + @swagger_auto_schema( + operation_summary="Update parsing settings", + request_body=openapi.Schema(type=openapi.TYPE_OBJECT), + tags=[PARSING_SETTINGS_TAG], + responses={200: "Parsing settings"}, + ) + def patch(self, request): + unknown_fields = set(request.data) - PARSING_SETTINGS_FIELDS + if unknown_fields: + raise ValidationError( + { + "detail": "Неизвестные настройки: " + + ", ".join(sorted(unknown_fields)) + } + ) + invalid = { + key: value + for key, value in request.data.items() + if value not in PARSING_SETTINGS_FREQUENCIES + } + if invalid: + raise ValidationError( + { + key: "Значение должно быть daily, weekly, monthly или yearly" + for key in invalid + } + ) + + settings_payload = {**self._get_settings(), **request.data} + cache.set(PARSING_SETTINGS_CACHE_KEY, settings_payload, timeout=None) + return Response(settings_payload, status=status.HTTP_200_OK) + + +class ParserLoadLogListCompatView(APIView): + """Old frontend alias: GET /api/v1/system/logs/.""" + + permission_classes = [IsAuthenticated] + + def _get_queryset(self, request): + queryset = ParserLoadLog.objects.all().order_by("-created_at") + source_value = request.query_params.get("source", "").strip() + status_value = request.query_params.get("status", "").strip() + batch_id = request.query_params.get("batch_id", "").strip() + search = request.query_params.get("search", "").strip() + + if source_value: + card_definition = SOURCE_CARD_BY_SLUG.get(source_value) + if card_definition: + queryset = queryset.filter( + source__in=_source_keys_to_model_sources( + card_definition.source_keys + ) + ) + else: + queryset = queryset.filter(source=source_value) + if status_value: + queryset = queryset.filter(status=status_value) + if batch_id: + try: + queryset = queryset.filter(batch_id=int(batch_id)) + except (TypeError, ValueError) as exc: + raise ValidationError( + {"batch_id": "Параметр batch_id должен быть целым числом"} + ) from exc + if search: + queryset = queryset.annotate( + batch_id_text=Cast("batch_id", output_field=CharField()) + ).filter( + Q(source__icontains=search) + | Q(status__icontains=search) + | Q(error_message__icontains=search) + | Q(batch_id_text__icontains=search) + ) + return queryset + + @swagger_auto_schema( + operation_summary="Parser load logs", + manual_parameters=[ + openapi.Parameter("source", openapi.IN_QUERY, type=openapi.TYPE_STRING), + openapi.Parameter("status", openapi.IN_QUERY, type=openapi.TYPE_STRING), + openapi.Parameter("batch_id", openapi.IN_QUERY, type=openapi.TYPE_INTEGER), + openapi.Parameter("search", openapi.IN_QUERY, type=openapi.TYPE_STRING), + openapi.Parameter("page", openapi.IN_QUERY, type=openapi.TYPE_INTEGER), + openapi.Parameter("page_size", openapi.IN_QUERY, type=openapi.TYPE_INTEGER), + ], + tags=[SYSTEM_LOGS_TAG], + responses={200: "Paginated parser logs"}, + ) + def get(self, request): + try: + page_size = max(1, min(int(request.query_params.get("page_size", 20)), 100)) + page = max(1, int(request.query_params.get("page", 1))) + except (TypeError, ValueError) as exc: + raise ValidationError( + {"detail": "Параметры page и page_size должны быть целыми числами"} + ) from exc + paginator = Paginator( + self._serialize_rows(self._get_queryset(request)), page_size + ) + page_obj = paginator.get_page(page) + return Response( + { + "count": paginator.count, + "next": None, + "previous": None, + "results": list(page_obj.object_list), + }, + status=status.HTTP_200_OK, + ) + + def _serialize_rows(self, queryset) -> list[dict[str, Any]]: + return [_serialize_log_row(log) for log in queryset] + + +class ParserLoadLogDetailCompatView(APIView): + """Old frontend alias: GET /api/v1/system/logs/{id}/.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_summary="Parser load log detail", + tags=[SYSTEM_LOGS_TAG], + responses={200: ParserLoadLogSerializer, 404: "Not found"}, + ) + def get(self, request, pk: int): + log = ParserLoadLog.objects.filter(pk=pk).first() + if log is None: + return api_error_response( + [{"code": "not_found", "message": "Лог загрузки не найден"}], + status_code=status.HTTP_404_NOT_FOUND, + ) + return Response(_serialize_log_row(log), status=status.HTTP_200_OK) + + +class ParserLoadLogExportCompatView(ParserLoadLogListCompatView): + """Old frontend alias: GET /api/v1/system/logs/export/.""" + + @swagger_auto_schema( + operation_summary="Export parser load logs", + tags=[SYSTEM_LOGS_TAG], + responses={200: "CSV"}, + ) + def get(self, request): + response = HttpResponse(content_type="text/csv; charset=utf-8") + response["Content-Disposition"] = 'attachment; filename="parser-load-logs.csv"' + writer = csv.writer(response) + writer.writerow( + [ + "id", + "batch_id", + "source", + "source_label", + "records_count", + "organizations_count", + "status", + "status_label", + "error_message", + "created_at", + "updated_at", + ] + ) + for row in self._serialize_rows(self._get_queryset(request)): + writer.writerow( + [ + row["id"], + row["batch_id"], + row["source"], + row["source_label"], + row["records_count"], + row["organizations_count"], + row["status"], + row["status_label"], + row["error_message"], + row["created_at"], + row["updated_at"], + ] + ) + return response + + +def _serialize_log_row(log: ParserLoadLog) -> dict[str, Any]: + source_slug = _card_slug_for_parser_source(log.source) or log.source + return { + "id": log.id, + "batch_id": log.batch_id, + "source": source_slug, + "source_label": _card_title_for_parser_source(log.source) + or log.get_source_display(), + "records_count": log.records_count, + "organizations_count": _organizations_count_for_source(log.source), + "status": log.status, + "status_label": LOG_STATUS_LABELS.get(log.status, log.status), + "error_message": log.error_message, + "created_at": log.created_at, + "updated_at": log.updated_at, + } + + +def get_frontend_source_card_slugs() -> list[str]: + """Expose frontend card slugs for tests and documentation.""" + return [definition.slug for definition in SOURCE_CARD_DEFINITIONS] diff --git a/src/apps/parsers/migrations/0017_generic_parser_records.py b/src/apps/parsers/migrations/0017_generic_parser_records.py new file mode 100644 index 0000000..c67041e --- /dev/null +++ b/src/apps/parsers/migrations/0017_generic_parser_records.py @@ -0,0 +1,292 @@ +# Generated by Codex on 2026-04-28 + +from django.db import migrations, models +import django.db.models.deletion + + +PARSER_SOURCE_CHOICES = [ + ("industrial", "Сертификаты промышленного производства"), + ("industrial_products", "Реестр промышленной продукции"), + ("manufactures", "Реестр производителей"), + ("inspections", "Единый реестр проверок"), + ("procurements", "Единая информационная система закупок"), + ("fns_reports", "Бухгалтерская отчетность ФНС"), + ("procurements_44fz", "Закупки 44-ФЗ"), + ("procurements_223fz", "Закупки 223-ФЗ"), + ("contracts", "Контракты ЕИС"), + ("unfair_suppliers", "Недобросовестные поставщики"), + ("fas_goz", "Уклонение от ГОЗ"), + ("arbitration", "Арбитражные дела"), + ("fedresurs_bankruptcy", "Банкротства Федресурс"), + ("fstec", "Реестры ФСТЭК"), + ("trudvsem", "Вакансии Работа России"), +] + + +class Migration(migrations.Migration): + dependencies = [ + ("parsers", "0016_auto_20260324_1120"), + ] + + operations = [ + migrations.AlterField( + model_name="parserloadlog", + name="source", + field=models.CharField( + choices=PARSER_SOURCE_CHOICES, + db_index=True, + help_text="Источник данных", + max_length=50, + verbose_name="источник", + ), + ), + migrations.CreateModel( + name="ParserBatchSequence", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + db_index=True, + help_text="Дата и время создания записи", + verbose_name="создано", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Дата и время последнего обновления", + verbose_name="обновлено", + ), + ), + ( + "source", + models.CharField( + choices=PARSER_SOURCE_CHOICES, + help_text="Источник данных", + max_length=50, + unique=True, + verbose_name="источник", + ), + ), + ( + "next_batch_id", + models.PositiveIntegerField( + default=1, + help_text="Следующий batch_id для источника", + verbose_name="следующий ID пакета", + ), + ), + ], + options={ + "verbose_name": "счётчик пакетов парсера", + "verbose_name_plural": "счётчики пакетов парсеров", + "db_table": "parsers_batch_sequence", + "ordering": ["source"], + }, + ), + migrations.CreateModel( + name="GenericParserRecord", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + db_index=True, + help_text="Дата и время создания записи", + verbose_name="создано", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Дата и время последнего обновления", + verbose_name="обновлено", + ), + ), + ( + "load_batch", + models.PositiveIntegerField( + db_index=True, + help_text="Идентификатор пакета загрузки", + verbose_name="ID пакета загрузки", + ), + ), + ( + "source", + models.CharField( + choices=PARSER_SOURCE_CHOICES, + db_index=True, + help_text="Источник данных", + max_length=50, + verbose_name="источник", + ), + ), + ( + "external_id", + models.CharField( + db_index=True, + help_text="Стабильный идентификатор записи во внешнем источнике", + max_length=255, + verbose_name="внешний ID", + ), + ), + ( + "inn", + models.CharField( + blank=True, + db_index=True, + help_text="ИНН организации, если есть в источнике", + max_length=20, + verbose_name="ИНН", + ), + ), + ( + "ogrn", + models.CharField( + blank=True, + db_index=True, + help_text="ОГРН организации, если есть в источнике", + max_length=20, + verbose_name="ОГРН", + ), + ), + ( + "organisation_name", + models.TextField( + blank=True, + help_text="Наименование организации из источника", + verbose_name="наименование организации", + ), + ), + ( + "title", + models.TextField( + blank=True, + help_text="Краткое описание записи", + verbose_name="заголовок", + ), + ), + ( + "record_date", + models.CharField( + blank=True, + db_index=True, + help_text="Дата записи в формате источника", + max_length=30, + verbose_name="дата записи", + ), + ), + ( + "amount", + models.DecimalField( + blank=True, + decimal_places=2, + help_text="Сумма, если источник её содержит", + max_digits=20, + null=True, + verbose_name="сумма", + ), + ), + ( + "status", + models.CharField( + blank=True, + help_text="Статус записи во внешнем источнике", + max_length=255, + verbose_name="статус", + ), + ), + ( + "url", + models.TextField( + blank=True, + help_text="Ссылка на карточку/документ во внешнем источнике", + verbose_name="URL", + ), + ), + ( + "payload", + models.JSONField( + blank=True, + default=dict, + help_text="Нормализованный исходный документ", + verbose_name="исходные данные", + ), + ), + ( + "registry_organization", + models.ForeignKey( + blank=True, + help_text="Связь с организацией из приложения реестров", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="generic_parser_records", + to="registers.organization", + verbose_name="организация из реестров", + ), + ), + ], + options={ + "verbose_name": "запись внешнего источника", + "verbose_name_plural": "записи внешних источников", + "db_table": "parsers_generic_record", + "ordering": ["-created_at"], + }, + ), + migrations.AddIndex( + model_name="genericparserrecord", + index=models.Index( + fields=["source", "load_batch"], + name="parsers_gen_source_a854d0_idx", + ), + ), + migrations.AddIndex( + model_name="genericparserrecord", + index=models.Index( + fields=["source", "inn"], + name="parsers_gen_source_b29dc4_idx", + ), + ), + migrations.AddIndex( + model_name="genericparserrecord", + index=models.Index( + fields=["source", "ogrn"], + name="parsers_gen_source_937e39_idx", + ), + ), + migrations.AddIndex( + model_name="genericparserrecord", + index=models.Index( + fields=["source", "record_date"], + name="parsers_gen_source_aaa60a_idx", + ), + ), + migrations.AddConstraint( + model_name="genericparserrecord", + constraint=models.UniqueConstraint( + fields=("source", "external_id"), + name="unique_generic_source_external_id", + ), + ), + ] diff --git a/src/apps/parsers/migrations/0018_seed_weekly_parser_schedules.py b/src/apps/parsers/migrations/0018_seed_weekly_parser_schedules.py new file mode 100644 index 0000000..b1bf2bd --- /dev/null +++ b/src/apps/parsers/migrations/0018_seed_weekly_parser_schedules.py @@ -0,0 +1,81 @@ +import json + +from django.db import migrations + + +WEEKLY_MSK_CRON = { + "minute": "0", + "hour": "0", + "day_of_week": "6", + "day_of_month": "*", + "month_of_year": "*", + "timezone": "Europe/Moscow", +} + +PARSER_WEEKLY_TASKS = [ + ("industrial", "apps.parsers.tasks.parse_industrial_production"), + ("manufactures", "apps.parsers.tasks.parse_manufactures"), + ("inspections", "apps.parsers.tasks.parse_inspections"), + ("sync_inspections", "apps.parsers.tasks.sync_inspections"), + ("mpt_products", "apps.parsers.tasks.parse_industrial_products"), + ("procurements_44fz", "apps.parsers.tasks.parse_procurements_44fz"), + ("procurements_223fz", "apps.parsers.tasks.parse_procurements_223fz"), + ("contracts", "apps.parsers.tasks.parse_contracts"), + ("unfair_suppliers", "apps.parsers.tasks.parse_unfair_suppliers"), + ("fas_goz", "apps.parsers.tasks.parse_fas_goz_evasion"), + ("fns_financial", "apps.parsers.tasks.scan_fns_directory"), + ("arbitration", "apps.parsers.tasks.parse_arbitration_cases"), + ("fedresurs_bankruptcy", "apps.parsers.tasks.parse_fedresurs_bankruptcy"), + ("fstec", "apps.parsers.tasks.parse_fstec_registers"), + ("trudvsem", "apps.parsers.tasks.parse_trudvsem_vacancies"), +] + + +def seed_weekly_parser_schedules(apps, schema_editor): + CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule") + PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask") + + crontab, _ = CrontabSchedule.objects.get_or_create(**WEEKLY_MSK_CRON) + field_names = {field.name for field in PeriodicTask._meta.fields} + schedule_fields = {"crontab": crontab} + for field_name in ("interval", "solar", "clocked"): + if field_name in field_names: + schedule_fields[field_name] = None + + for source_key, task_name in PARSER_WEEKLY_TASKS: + PeriodicTask.objects.update_or_create( + name=f"parser:{source_key}:weekly-saturday-msk", + defaults={ + "task": task_name, + "args": json.dumps([]), + "kwargs": json.dumps({}), + "enabled": True, + "description": ( + "Default parser schedule: weekly on Saturday 00:00 MSK." + ), + **schedule_fields, + }, + ) + + +def remove_weekly_parser_schedules(apps, schema_editor): + PeriodicTask = apps.get_model("django_celery_beat", "PeriodicTask") + task_names = [ + f"parser:{source_key}:weekly-saturday-msk" + for source_key, _task_name in PARSER_WEEKLY_TASKS + ] + PeriodicTask.objects.filter(name__in=task_names).delete() + + +class Migration(migrations.Migration): + dependencies = [ + ("django_celery_beat", "0018_improve_crontab_helptext"), + ("parsers", "0017_generic_parser_records"), + ] + + operations = [ + migrations.RunPython( + seed_weekly_parser_schedules, + reverse_code=remove_weekly_parser_schedules, + ), + ] diff --git a/src/apps/parsers/models.py b/src/apps/parsers/models.py index 9cec881..2f73158 100644 --- a/src/apps/parsers/models.py +++ b/src/apps/parsers/models.py @@ -23,6 +23,15 @@ class ParserLoadLog(TimestampMixin, models.Model): INSPECTIONS = "inspections", _("Единый реестр проверок") PROCUREMENTS = "procurements", _("Единая информационная система закупок") FNS_REPORTS = "fns_reports", _("Бухгалтерская отчетность ФНС") + PROCUREMENTS_44FZ = "procurements_44fz", _("Закупки 44-ФЗ") + PROCUREMENTS_223FZ = "procurements_223fz", _("Закупки 223-ФЗ") + CONTRACTS = "contracts", _("Контракты ЕИС") + UNFAIR_SUPPLIERS = "unfair_suppliers", _("Недобросовестные поставщики") + FAS_GOZ = "fas_goz", _("Уклонение от ГОЗ") + ARBITRATION = "arbitration", _("Арбитражные дела") + FEDRESURS_BANKRUPTCY = "fedresurs_bankruptcy", _("Банкротства Федресурс") + FSTEC = "fstec", _("Реестры ФСТЭК") + TRUDVSEM = "trudvsem", _("Вакансии Работа России") class Status(models.TextChoices): SUCCESS = "success", _("Успешно") @@ -80,6 +89,32 @@ class ParserLoadLog(TimestampMixin, models.Model): return f"Load #{self.batch_id} ({self.source}) - {self.records_count} records" +class ParserBatchSequence(TimestampMixin, models.Model): + """Атомарный счётчик batch_id по источнику парсера.""" + + source = models.CharField( + _("источник"), + max_length=50, + choices=ParserLoadLog.Source.choices, + unique=True, + help_text=_("Источник данных"), + ) + next_batch_id = models.PositiveIntegerField( + _("следующий ID пакета"), + default=1, + help_text=_("Следующий batch_id для источника"), + ) + + class Meta: + db_table = "parsers_batch_sequence" + verbose_name = _("счётчик пакетов парсера") + verbose_name_plural = _("счётчики пакетов парсеров") + ordering = ["source"] + + def __str__(self) -> str: + return f"{self.source}: next batch {self.next_batch_id}" + + class IndustrialCertificateRecord(TimestampMixin, models.Model): """ Сертификат промышленного производства РФ. @@ -323,6 +358,116 @@ class IndustrialProductRecord(TimestampMixin, models.Model): return f"{self.registry_number} - {self.product_name[:50]}" +class GenericParserRecord(TimestampMixin, models.Model): + """Универсальная запись для разнородных официальных источников.""" + + load_batch = models.PositiveIntegerField( + _("ID пакета загрузки"), + db_index=True, + help_text=_("Идентификатор пакета загрузки"), + ) + source = models.CharField( + _("источник"), + max_length=50, + choices=ParserLoadLog.Source.choices, + db_index=True, + help_text=_("Источник данных"), + ) + external_id = models.CharField( + _("внешний ID"), + max_length=255, + db_index=True, + help_text=_("Стабильный идентификатор записи во внешнем источнике"), + ) + inn = models.CharField( + _("ИНН"), + max_length=20, + blank=True, + db_index=True, + help_text=_("ИНН организации, если есть в источнике"), + ) + ogrn = models.CharField( + _("ОГРН"), + max_length=20, + blank=True, + db_index=True, + help_text=_("ОГРН организации, если есть в источнике"), + ) + organisation_name = models.TextField( + _("наименование организации"), + blank=True, + help_text=_("Наименование организации из источника"), + ) + title = models.TextField( + _("заголовок"), + blank=True, + help_text=_("Краткое описание записи"), + ) + record_date = models.CharField( + _("дата записи"), + max_length=30, + blank=True, + db_index=True, + help_text=_("Дата записи в формате источника"), + ) + amount = models.DecimalField( + _("сумма"), + max_digits=20, + decimal_places=2, + null=True, + blank=True, + help_text=_("Сумма, если источник её содержит"), + ) + status = models.CharField( + _("статус"), + max_length=255, + blank=True, + help_text=_("Статус записи во внешнем источнике"), + ) + url = models.TextField( + _("URL"), + blank=True, + help_text=_("Ссылка на карточку/документ во внешнем источнике"), + ) + payload = models.JSONField( + _("исходные данные"), + default=dict, + blank=True, + help_text=_("Нормализованный исходный документ"), + ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="generic_parser_records", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) + + class Meta: + db_table = "parsers_generic_record" + verbose_name = _("запись внешнего источника") + verbose_name_plural = _("записи внешних источников") + ordering = ["-created_at"] + indexes = [ + models.Index(fields=["source", "load_batch"]), + models.Index(fields=["source", "inn"]), + models.Index(fields=["source", "ogrn"]), + models.Index(fields=["source", "record_date"]), + ] + constraints = [ + models.UniqueConstraint( + fields=["source", "external_id"], + name="unique_generic_source_external_id", + ), + ] + + def __str__(self) -> str: + label = self.organisation_name or self.title or self.external_id + return f"{self.source}: {label[:80]}" + + class Proxy(TimestampMixin, models.Model): """ Прокси-сервер для парсеров. diff --git a/src/apps/parsers/serializers.py b/src/apps/parsers/serializers.py index a18da16..81a2a1b 100644 --- a/src/apps/parsers/serializers.py +++ b/src/apps/parsers/serializers.py @@ -1,13 +1,19 @@ -""" -Сериализаторы для приложения парсеров. +"""Сериализаторы для приложения парсеров.""" -Все сериализаторы read-only, так как данные загружаются только через парсеры. -""" +import ipaddress +import socket +from urllib.parse import urlsplit +from apps.parsers.clients.common.structured import ( + MAX_FILE_SIZE_BYTES, + SUPPORTED_EXCEL_EXTENSIONS, + SUPPORTED_ZIP_EXTENSIONS, +) from apps.parsers.fns_upload import FNS_XLSX_FILENAME_RE from apps.parsers.models import ( FinancialReport, FinancialReportLine, + GenericParserRecord, IndustrialCertificateRecord, IndustrialProductRecord, InspectionRecord, @@ -18,6 +24,53 @@ from apps.parsers.models import ( ) from rest_framework import serializers +BLOCKED_FILE_HOSTS = {"localhost", "localhost.localdomain"} +BLOCKED_FILE_HOST_SUFFIXES = (".localhost", ".local", ".internal") +SUPPORTED_UPLOAD_EXTENSIONS = tuple( + sorted({*SUPPORTED_EXCEL_EXTENSIONS, *SUPPORTED_ZIP_EXTENSIONS, ".zip"}) +) + + +def _is_blocked_ip(address: str) -> bool: + ip = ipaddress.ip_address(address) + return ( + ip.is_loopback + or ip.is_private + or ip.is_link_local + or ip.is_multicast + or ip.is_reserved + or ip.is_unspecified + ) + + +def _validate_public_file_host(host: str) -> None: + normalized_host = host.strip("[]").rstrip(".").lower() + if normalized_host in BLOCKED_FILE_HOSTS or normalized_host.endswith( + BLOCKED_FILE_HOST_SUFFIXES + ): + raise serializers.ValidationError("file_url host is not allowed") + + try: + if _is_blocked_ip(normalized_host): + raise serializers.ValidationError("file_url IP address is not allowed") + return + except ValueError: + pass + + try: + address_info = socket.getaddrinfo( + normalized_host, + 443, + type=socket.SOCK_STREAM, + ) + except socket.gaierror as exc: + raise serializers.ValidationError("file_url host cannot be resolved") from exc + + for item in address_info: + if _is_blocked_ip(item[4][0]): + raise serializers.ValidationError("file_url host resolves to private IP") + + # ============================================================================= # Минпромторг - Сертификаты промышленного производства # ============================================================================= @@ -337,6 +390,220 @@ class ParsingSettingsSerializer(serializers.ModelSerializer): ] +class ParserSourceSerializer(serializers.Serializer): + """Описание доступного parser source для dashboard/API.""" + + key = serializers.CharField() + source = serializers.CharField() + title = serializers.CharField() + agency = serializers.CharField() + data_scope = serializers.CharField() + task_name = serializers.CharField() + is_existing = serializers.BooleanField() + requires_file_url = serializers.BooleanField() + mode = serializers.CharField() + status = serializers.CharField() + owner = serializers.CharField(allow_blank=True) + upstream_url = serializers.URLField(allow_blank=True) + access_method = serializers.CharField() + parser_strategy = serializers.CharField() + source_notes = serializers.CharField(allow_blank=True) + supports_file_upload = serializers.BooleanField() + result_list_url = serializers.CharField() + result_detail_url = serializers.CharField() + upload_url = serializers.CharField(allow_blank=True) + + +class ParserRunRequestSerializer(serializers.Serializer): + """Параметры запуска Celery-задачи парсера.""" + + file_url = serializers.URLField(required=False, allow_blank=True) + file_path = serializers.CharField(required=False, allow_blank=True) + proxies = serializers.ListField( + child=serializers.CharField(), + required=False, + allow_empty=True, + ) + year = serializers.IntegerField(required=False, min_value=2000, max_value=2100) + month = serializers.IntegerField(required=False, min_value=1, max_value=12) + limit = serializers.IntegerField(required=False, min_value=1, max_value=1000) + offset = serializers.IntegerField(required=False, min_value=0) + max_months_per_law = serializers.IntegerField( + required=False, + min_value=1, + max_value=36, + ) + start_year = serializers.IntegerField( + required=False, + min_value=2000, + max_value=2100, + ) + start_month = serializers.IntegerField(required=False, min_value=1, max_value=12) + include_fz294 = serializers.BooleanField(required=False) + include_fz248 = serializers.BooleanField(required=False) + current_year = serializers.IntegerField( + required=False, + min_value=2000, + max_value=2100, + ) + current_month = serializers.IntegerField(required=False, min_value=1, max_value=12) + region_code = serializers.CharField(required=False, allow_blank=True) + law_type = serializers.CharField(required=False, allow_blank=True) + company_inn = serializers.CharField(required=False, allow_blank=True) + text = serializers.CharField(required=False, allow_blank=True) + + def validate_file_url(self, value: str) -> str: + """Разрешить worker-download только с публичных HTTPS URL.""" + if not value: + return value + + parsed = urlsplit(value) + if parsed.scheme != "https": + raise serializers.ValidationError("file_url must use https") + if parsed.username or parsed.password: + raise serializers.ValidationError("file_url credentials are not allowed") + if not parsed.hostname: + raise serializers.ValidationError("file_url host is required") + + _validate_public_file_host(parsed.hostname) + return value + + def validate(self, attrs): + attrs = super().validate(attrs) + if ("start_year" in attrs) != ("start_month" in attrs): + raise serializers.ValidationError( + { + "start_month": ( + "start_year and start_month must be provided together" + ) + } + ) + return attrs + + +class ParserUploadRequestSerializer(serializers.Serializer): + """Файл ручной загрузки реестра или финансовой выгрузки.""" + + file = serializers.FileField() + + def validate_file(self, value): + file_name = (value.name or "").lower() + if not file_name.endswith(SUPPORTED_UPLOAD_EXTENSIONS): + extensions = ", ".join(SUPPORTED_UPLOAD_EXTENSIONS) + raise serializers.ValidationError( + f"Unsupported file extension. Allowed: {extensions}" + ) + if value.size > MAX_FILE_SIZE_BYTES: + raise serializers.ValidationError( + f"File exceeds size limit: {MAX_FILE_SIZE_BYTES} bytes" + ) + return value + + +class ParserScheduleRequestSerializer(ParserRunRequestSerializer): + """Параметры создания/обновления периодической Celery-задачи парсера.""" + + SCHEDULE_TYPES = ("interval", "crontab") + PERIODS = ("seconds", "minutes", "hours", "days") + + source_key = serializers.CharField(required=False) + name = serializers.CharField(required=False, allow_blank=True, max_length=200) + enabled = serializers.BooleanField(required=False, default=True) + schedule_type = serializers.ChoiceField( + choices=SCHEDULE_TYPES, + required=False, + default="interval", + ) + every = serializers.IntegerField(required=False, min_value=1) + period = serializers.ChoiceField(choices=PERIODS, required=False, default="hours") + minute = serializers.CharField(required=False, allow_blank=True, default="0") + hour = serializers.CharField(required=False, allow_blank=True, default="*") + day_of_week = serializers.CharField(required=False, allow_blank=True, default="*") + day_of_month = serializers.CharField(required=False, allow_blank=True, default="*") + month_of_year = serializers.CharField(required=False, allow_blank=True, default="*") + + def validate(self, attrs): + attrs = super().validate(attrs) + if attrs.get("schedule_type", "interval") == "interval" and not attrs.get( + "every" + ): + raise serializers.ValidationError( + {"every": "Required for interval schedule"} + ) + return attrs + + +class ParserScheduleSerializer(serializers.Serializer): + """Описание периодической задачи парсера.""" + + id = serializers.IntegerField() + name = serializers.CharField() + source_key = serializers.CharField() + source = serializers.CharField() + title = serializers.CharField() + task_name = serializers.CharField() + enabled = serializers.BooleanField() + schedule_type = serializers.CharField() + schedule = serializers.DictField() + params = serializers.DictField() + last_run_at = serializers.DateTimeField(allow_null=True) + total_run_count = serializers.IntegerField() + date_changed = serializers.DateTimeField() + + +class ParserListQuerySerializer(serializers.Serializer): + """Общие query-параметры списков парсеров.""" + + limit = serializers.IntegerField( + required=False, + default=50, + min_value=1, + max_value=200, + ) + + +class ParserResultQuerySerializer(serializers.Serializer): + """Query-параметры per-source result endpoints.""" + + page = serializers.IntegerField(required=False, default=1, min_value=1) + page_size = serializers.IntegerField( + required=False, + default=20, + min_value=1, + max_value=100, + ) + limit = serializers.IntegerField(required=False, min_value=1, max_value=100) + id = serializers.IntegerField(required=False, min_value=1) + source = serializers.CharField(required=False, allow_blank=True) + external_id = serializers.CharField(required=False, allow_blank=True) + inn = serializers.CharField(required=False, allow_blank=True) + ogrn = serializers.CharField(required=False, allow_blank=True) + load_batch = serializers.IntegerField(required=False, min_value=1) + batch_id = serializers.IntegerField(required=False, min_value=1) + status = serializers.CharField(required=False, allow_blank=True) + record_date = serializers.CharField(required=False, allow_blank=True) + search = serializers.CharField(required=False, allow_blank=True) + ordering = serializers.CharField(required=False, allow_blank=True) + include_payload = serializers.BooleanField(required=False, default=True) + + def validate(self, attrs): + attrs = super().validate(attrs) + if attrs.get("limit"): + attrs["page"] = 1 + attrs["page_size"] = attrs["limit"] + if attrs.get("batch_id") and not attrs.get("load_batch"): + attrs["load_batch"] = attrs["batch_id"] + return attrs + + +class ParserRunResponseSerializer(serializers.Serializer): + """Ответ API на запуск задачи.""" + + task_id = serializers.CharField() + source = serializers.CharField() + task_name = serializers.CharField() + + # ============================================================================= # Служебные модели # ============================================================================= @@ -420,6 +687,56 @@ class ParserLoadLogSerializer(serializers.ModelSerializer): return 0 +class GenericParserRecordSerializer(serializers.ModelSerializer): + """Сериализатор универсальных записей новых источников.""" + + class Meta: + model = GenericParserRecord + fields = [ + "id", + "load_batch", + "source", + "external_id", + "inn", + "ogrn", + "organisation_name", + "title", + "record_date", + "amount", + "status", + "url", + "payload", + "registry_organization", + "created_at", + "updated_at", + ] + read_only_fields = fields + + +class ParserResultRecordSerializer(serializers.Serializer): + """Унифицированная запись результата конкретного источника.""" + + id = serializers.IntegerField() + load_batch = serializers.IntegerField() + source = serializers.CharField() + external_id = serializers.CharField(allow_blank=True) + inn = serializers.CharField(allow_blank=True) + ogrn = serializers.CharField(allow_blank=True) + organisation_name = serializers.CharField(allow_blank=True) + title = serializers.CharField(allow_blank=True) + record_date = serializers.CharField(allow_blank=True) + amount = serializers.DecimalField( + max_digits=20, + decimal_places=2, + allow_null=True, + ) + status = serializers.CharField(allow_blank=True) + url = serializers.CharField(allow_blank=True) + payload = serializers.DictField() + created_at = serializers.DateTimeField() + updated_at = serializers.DateTimeField() + + class ParserLoadLogListSerializer(serializers.Serializer): """Строка списка логов в frontend-friendly формате.""" diff --git a/src/apps/parsers/services.py b/src/apps/parsers/services.py index 4b16c1f..43804ad 100644 --- a/src/apps/parsers/services.py +++ b/src/apps/parsers/services.py @@ -15,6 +15,7 @@ from typing import Any from urllib.parse import urlparse from apps.core.services import BaseService, BulkOperationsMixin +from apps.parsers.clients.common import GenericParserItem from apps.parsers.clients.minpromtorg.schemas import ( IndustrialCertificate, IndustrialProduct, @@ -26,19 +27,21 @@ from apps.parsers.clients.zakupki.schemas import Procurement from apps.parsers.models import ( FinancialReport, FinancialReportLine, + GenericParserRecord, IndustrialCertificateRecord, IndustrialProductRecord, InspectionRecord, ManufacturerRecord, + ParserBatchSequence, ParserLoadLog, ProcurementRecord, Proxy, ) -from registers.models import Organization from django.conf import settings from django.db import IntegrityError, transaction from django.db.models import Q from django.utils import timezone +from registers.models import Organization logger = logging.getLogger(__name__) @@ -52,6 +55,17 @@ _DATE_FORMATS = ( ) +def _model_defaults(instance, lookup_fields: list[str]) -> dict: + """Собрать defaults для get_or_create из Django model instance.""" + lookup = set(lookup_fields) + defaults = {} + for field in instance._meta.concrete_fields: + if field.primary_key or field.name in lookup: + continue + defaults[field.name] = getattr(instance, field.name) + return defaults + + def normalize_to_date(value: str | None) -> date | None: """Нормализовать строку с датой в date.""" if value is None: @@ -238,10 +252,15 @@ class ParserLoadLogService(BaseService[ParserLoadLog]): model = ParserLoadLog @classmethod - @transaction.atomic + def _get_next_batch_id_from_logs(cls, source: str) -> int: + """Рассчитать следующий batch_id по фактическим логам.""" + last_log = cls.model.objects.filter(source=source).order_by("-batch_id").first() + return (last_log.batch_id + 1) if last_log else 1 + + @classmethod def get_next_batch_id(cls, source: str) -> int: """ - Получить следующий batch_id для источника. + Получить следующий batch_id для источника без резервирования. Args: source: Код источника (industrial, manufactures) @@ -249,13 +268,57 @@ class ParserLoadLogService(BaseService[ParserLoadLog]): Returns: Следующий batch_id """ - last_log = ( - cls.model.objects.select_for_update() - .filter(source=source) - .order_by("-batch_id") - .first() - ) - return (last_log.batch_id + 1) if last_log else 1 + sequence = ParserBatchSequence.objects.filter(source=source).first() + if sequence is None: + return cls._get_next_batch_id_from_logs(source) + return max(sequence.next_batch_id, cls._get_next_batch_id_from_logs(source)) + + @classmethod + def create_next_load_log( + cls, + *, + source: str, + records_count: int = 0, + status: str = "success", + error_message: str = "", + ) -> ParserLoadLog: + """Атомарно зарезервировать batch_id и создать ParserLoadLog.""" + for _ in range(3): + try: + with transaction.atomic(): + sequence = ( + ParserBatchSequence.objects.select_for_update() + .filter(source=source) + .first() + ) + if sequence is None: + sequence = ParserBatchSequence.objects.create( + source=source, + next_batch_id=cls._get_next_batch_id_from_logs(source), + ) + + next_from_logs = cls._get_next_batch_id_from_logs(source) + if sequence.next_batch_id < next_from_logs: + sequence.next_batch_id = next_from_logs + + batch_id = sequence.next_batch_id + sequence.next_batch_id = batch_id + 1 + sequence.save(update_fields=["next_batch_id", "updated_at"]) + + return cls.model.objects.create( + source=source, + batch_id=batch_id, + records_count=records_count, + status=status, + error_message=error_message, + ) + except IntegrityError: + logger.warning( + "Retrying parser batch allocation after sequence conflict " + "(source=%s)", + source, + ) + raise IntegrityError(f"Cannot allocate parser batch_id for source={source}") @classmethod @transaction.atomic @@ -290,7 +353,6 @@ class ParserLoadLogService(BaseService[ParserLoadLog]): ) @classmethod - @transaction.atomic def create_load_log_with_next_batch_id( cls, *, @@ -317,7 +379,13 @@ class ParserLoadLogService(BaseService[ParserLoadLog]): ) return log, batch_id except IntegrityError: - continue + logger.warning( + "Retrying parser load log creation after batch conflict " + "(source=%s, batch_id=%s)", + source, + batch_id, + ) + raise RuntimeError("Failed to allocate unique batch_id") @classmethod @@ -632,6 +700,108 @@ class IndustrialProductService( return cls.filter(registry_number=registry_number) +class GenericParserRecordService(BulkOperationsMixin, BaseService[GenericParserRecord]): + """Сервис для универсальных записей новых источников.""" + + model = GenericParserRecord + + @classmethod + def _create_with_exact_count( + cls, + instances: list[GenericParserRecord], + *, + unique_fields: list[str], + chunk_size: int, + ) -> int: + """Создать записи и точно вернуть количество новых строк.""" + try: + with transaction.atomic(): + cls.bulk_create_chunked(instances, chunk_size=chunk_size) + return len(instances) + except IntegrityError: + logger.info("Falling back to get_or_create after generic record conflict") + + created_count = 0 + for instance in instances: + lookup = {field: getattr(instance, field) for field in unique_fields} + _, created = cls.model.objects.get_or_create( + defaults=_model_defaults(instance, unique_fields), + **lookup, + ) + if created: + created_count += 1 + return created_count + + @classmethod + @transaction.atomic + def save_records( + cls, + records: list[GenericParserItem], + batch_id: int, + *, + source: str, + chunk_size: int = 500, + ) -> int: + """Сохранить нормализованные записи нового источника.""" + if not records: + logger.warning("No generic parser records to save (source=%s)", source) + return 0 + + unique_records = {} + for record in records: + unique_records.setdefault(record.external_id, record) + + existing_external_ids = set( + cls.model.objects.filter( + source=source, + external_id__in=unique_records.keys(), + ).values_list("external_id", flat=True) + ) + instances = [ + cls.model( + load_batch=batch_id, + source=source, + external_id=record.external_id, + inn=record.inn, + ogrn=record.ogrn, + organisation_name=record.organisation_name, + title=record.title, + record_date=record.record_date, + amount=record.amount, + status=record.status, + url=record.url, + payload=record.payload, + ) + for external_id, record in unique_records.items() + if external_id not in existing_external_ids + ] + if not instances: + logger.info("No new generic records to save (source=%s)", source) + return 0 + + return cls._create_with_exact_count( + instances, + unique_fields=["source", "external_id"], + chunk_size=chunk_size, + ) + + @classmethod + def find_by_inn(cls, inn: str, source: str | None = None): + """Найти generic records по ИНН.""" + qs = cls.filter(inn=inn) + if source: + qs = qs.filter(source=source) + return qs + + @classmethod + def find_by_ogrn(cls, ogrn: str, source: str | None = None): + """Найти generic records по ОГРН.""" + qs = cls.filter(ogrn=ogrn) + if source: + qs = qs.filter(source=source) + return qs + + class ProxyService(BaseService[Proxy]): """ Сервис для управления прокси-серверами. diff --git a/src/apps/parsers/source_registry.py b/src/apps/parsers/source_registry.py new file mode 100644 index 0000000..1ac923e --- /dev/null +++ b/src/apps/parsers/source_registry.py @@ -0,0 +1,294 @@ +"""Каталог парсеров и источников данных.""" + +from dataclasses import dataclass + +from apps.parsers.models import ParserLoadLog + + +@dataclass(frozen=True) +class ParserSourceDescriptor: + """Описание источника для API и запуска задач.""" + + key: str + source: str + title: str + agency: str + data_scope: str + task_name: str + is_existing: bool = False + requires_file_url: bool = False + mode: str = "native_api" + status: str = "implemented" + owner: str = "" + upstream_url: str = "" + access_method: str = "api" + parser_strategy: str = "native" + source_notes: str = "" + supports_file_upload: bool = False + api_route: str = "" + upload_route: str = "" + + @property + def result_list_url(self) -> str: + """Frontend/API URL для списка результата источника.""" + return f"/api/v1/{self.api_route}/" if self.api_route else "" + + @property + def result_detail_url(self) -> str: + """Frontend/API URL для карточки результата источника.""" + return f"/api/v1/{self.api_route}/{{id}}/" if self.api_route else "" + + @property + def upload_url(self) -> str: + """Frontend/API URL ручной загрузки файла источника.""" + if not self.supports_file_upload or not self.api_route: + return "" + return f"/api/v1/{self.upload_api_route}/" + + @property + def upload_api_route(self) -> str: + """API route ручной загрузки без prefix /api/v1.""" + if self.upload_route: + return self.upload_route + return f"{self.api_route}/upload" + + +PARSER_SOURCES: dict[str, ParserSourceDescriptor] = { + "industrial": ParserSourceDescriptor( + key="industrial", + source=ParserLoadLog.Source.INDUSTRIAL, + title="Сертификаты промышленного производства", + agency="Минпромторг России", + data_scope="Заключения о подтверждении производства промышленной продукции", + task_name="apps.parsers.tasks.parse_industrial_production", + is_existing=True, + upstream_url="https://minpromtorg.gov.ru/api/kss-document-preview", + parser_strategy="minpromtorg_excel_discovery", + source_notes=( + "Параметры discovery: types[]=668d4f2a-966a-4b65-9fb9-2f1ad19a3d1f, " + "fragment=Заключения о подтверждении производства промышленной продукции." + ), + api_route="minpromtorg/certificates", + ), + "manufactures": ParserSourceDescriptor( + key="manufactures", + source=ParserLoadLog.Source.MANUFACTURES, + title="Реестр производителей", + agency="Минпромторг России", + data_scope="Производители промышленной продукции", + task_name="apps.parsers.tasks.parse_manufactures", + is_existing=True, + upstream_url="https://minpromtorg.gov.ru/api/kss-document-preview", + parser_strategy="minpromtorg_excel_discovery", + source_notes=( + "Параметры discovery: types[]=668d4f2a-966a-4b65-9fb9-2f1ad19a3d1f, " + "fragment=Производители промышленной продукции." + ), + api_route="minpromtorg/manufacturers", + ), + "inspections": ParserSourceDescriptor( + key="inspections", + source=ParserLoadLog.Source.INSPECTIONS, + title="Проверки Генпрокуратуры", + agency="Генпрокуратура РФ", + data_scope="Плановые и внеплановые проверки", + task_name="apps.parsers.tasks.parse_inspections", + is_existing=True, + upstream_url="https://proverki.gov.ru/portal/public-open-data", + access_method="open_data_portal", + parser_strategy="proverki_open_data", + api_route="proverki", + ), + "sync_inspections": ParserSourceDescriptor( + key="sync_inspections", + source=ParserLoadLog.Source.INSPECTIONS, + title="Синхронизация проверок", + agency="Генпрокуратура РФ", + data_scope="Помесячная синхронизация проверок", + task_name="apps.parsers.tasks.sync_inspections", + is_existing=True, + upstream_url="https://proverki.gov.ru/portal/public-open-data/check", + access_method="open_data_portal", + parser_strategy="proverki_open_data_sync", + api_route="proverki", + ), + "mpt_products": ParserSourceDescriptor( + key="mpt_products", + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS, + title="Продукция Минпромторга", + agency="Минпромторг России", + data_scope="Продукция российских производителей", + task_name="apps.parsers.tasks.parse_industrial_products", + mode="official_api", + status="implemented", + upstream_url="https://gisp.gov.ru/pp719v2/pub/prod/", + access_method="official_registry_api", + parser_strategy="gisp_product_registry", + source_notes="Реестр промышленной продукции РФ опубликован в ГИСП.", + api_route="minpromtorg/products", + ), + "procurements_44fz": ParserSourceDescriptor( + key="procurements_44fz", + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + title="Закупки 44-ФЗ", + agency="Федеральное казначейство", + data_scope="Извещения и закупочные процедуры 44-ФЗ", + task_name="apps.parsers.tasks.parse_procurements_44fz", + mode="official_api", + status="implemented", + upstream_url="https://zakupki.gov.ru/epz/order/extendedsearch/results.html", + access_method="eis_official_api", + parser_strategy="eis_44fz_search", + source_notes="Официальный поиск ЕИС; XML-выгрузки ЕИС требуют отдельного discovery по реестрам.", + api_route="zakupki", + ), + "procurements_223fz": ParserSourceDescriptor( + key="procurements_223fz", + source=ParserLoadLog.Source.PROCUREMENTS_223FZ, + title="Закупки 223-ФЗ", + agency="Федеральное казначейство", + data_scope="Извещения и закупочные процедуры 223-ФЗ", + task_name="apps.parsers.tasks.parse_procurements_223fz", + mode="official_api", + status="implemented", + upstream_url="https://zakupki.gov.ru/epz/orderclause/search/results.html", + access_method="eis_official_api", + parser_strategy="eis_223fz_search", + source_notes="Официальный реестр положений о закупках 223-ФЗ в ЕИС.", + api_route="zakupki", + ), + "contracts": ParserSourceDescriptor( + key="contracts", + source=ParserLoadLog.Source.CONTRACTS, + title="Контракты ЕИС", + agency="Федеральное казначейство", + data_scope="Государственные и корпоративные контракты", + task_name="apps.parsers.tasks.parse_contracts", + mode="official_api", + status="implemented", + upstream_url="https://zakupki.gov.ru/epz/contract/search/results.html", + access_method="eis_official_api", + parser_strategy="eis_contract_registry", + api_route="zakupki", + ), + "unfair_suppliers": ParserSourceDescriptor( + key="unfair_suppliers", + source=ParserLoadLog.Source.UNFAIR_SUPPLIERS, + title="Недобросовестные поставщики", + agency="ФАС России / ЕИС Закупки", + data_scope="Реестр недобросовестных поставщиков", + task_name="apps.parsers.tasks.parse_unfair_suppliers", + mode="official_api", + status="implemented", + upstream_url="https://zakupki.gov.ru/epz/dishonestsupplier/search/results.html", + access_method="eis_official_api", + parser_strategy="eis_unfair_supplier_registry", + api_route="fas/unfair-suppliers", + ), + "fas_goz": ParserSourceDescriptor( + key="fas_goz", + source=ParserLoadLog.Source.FAS_GOZ, + title="Уклонение от ГОЗ", + agency="ФАС России", + data_scope="Юрлица, привлеченные за отказ или уклонение от ГОЗ", + task_name="apps.parsers.tasks.parse_fas_goz_evasion", + mode="official_api", + status="implemented", + upstream_url="https://fas.gov.ru/pages/activity/reestr-uridicheskih-lic", + access_method="official_registry_api", + parser_strategy="fas_goz_registry", + api_route="fas/goz", + ), + "fns_financial": ParserSourceDescriptor( + key="fns_financial", + source=ParserLoadLog.Source.FNS_REPORTS, + title="Финансово-экономические показатели", + agency="ФНС России", + data_scope="Финансово-экономическая выгрузка", + task_name="apps.parsers.tasks.scan_fns_directory", + mode="official_api", + status="implemented", + owner="Сергей", + upstream_url=( + "https://bo.nalog.gov.ru/advanced-search/organizations/search" + "?query=%D0%9E%D0%9E%D0%9E&page=0&size=100" + ), + access_method="public_web_api", + parser_strategy="fns_bfo_search_and_download", + source_notes=( + "ГИР БО: поиск организаций и скачивание отчетности с ЭП ФНС. " + "Ручная загрузка разрешена для финансовых выгрузок от Сергея." + ), + supports_file_upload=True, + api_route="fns/reports", + upload_route="fns/upload", + ), + "arbitration": ParserSourceDescriptor( + key="arbitration", + source=ParserLoadLog.Source.ARBITRATION, + title="Арбитражные дела", + agency="Верховный суд РФ / КАД Арбитр", + data_scope="Арбитражные дела по организациям", + task_name="apps.parsers.tasks.parse_arbitration_cases", + mode="official_api", + status="implemented", + upstream_url="https://kad.arbitr.ru/", + access_method="official_search_api", + parser_strategy="kad_arbitr_search", + api_route="arbitration/cases", + ), + "fedresurs_bankruptcy": ParserSourceDescriptor( + key="fedresurs_bankruptcy", + source=ParserLoadLog.Source.FEDRESURS_BANKRUPTCY, + title="Банкротства Федресурс", + agency="Федресурс", + data_scope="Сведения о процедурах банкротства", + task_name="apps.parsers.tasks.parse_fedresurs_bankruptcy", + mode="official_api", + status="implemented", + owner="Сергей", + upstream_url="https://bankrot.fedresurs.ru/", + access_method="official_registry_api", + parser_strategy="fedresurs_bankruptcy_search", + source_notes=( + "Официальный ЕФРСБ; может отдавать anti-bot challenge worker'ам. " + "Ручная загрузка разрешена только для выгрузок, переданных Сергеем." + ), + supports_file_upload=True, + api_route="fedresurs/bankruptcy", + ), + "fstec": ParserSourceDescriptor( + key="fstec", + source=ParserLoadLog.Source.FSTEC, + title="Реестры ФСТЭК", + agency="ФСТЭК России", + data_scope="Реестры по информационной безопасности", + task_name="apps.parsers.tasks.parse_fstec_registers", + mode="official_api", + status="implemented", + upstream_url="https://reestr.fstec.ru/reg3", + access_method="official_registry_api", + parser_strategy="fstec_registry_table", + api_route="fstec/registers", + ), + "trudvsem": ParserSourceDescriptor( + key="trudvsem", + source=ParserLoadLog.Source.TRUDVSEM, + title="Вакансии Работа России", + agency="ЕЦП Работа в России", + data_scope="Вакансии работодателей", + task_name="apps.parsers.tasks.parse_trudvsem_vacancies", + upstream_url="https://opendata.trudvsem.ru/api/v1/vacancies", + access_method="public_api", + parser_strategy="trudvsem_vacancies_api", + api_route="trudvsem/vacancies", + ), +} + + +def get_source_by_model_source(source: str) -> ParserSourceDescriptor | None: + """Найти описание парсера по значению ParserLoadLog.Source.""" + for descriptor in PARSER_SOURCES.values(): + if descriptor.source == source: + return descriptor + return None diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index d0ccf03..03a4769 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -14,16 +14,19 @@ from pathlib import Path from apps.core.services import BackgroundJobService from apps.core.tasks import PeriodicTask as CorePeriodicTask +from apps.parsers.clients.common import GenericParserItem, StructuredDataClient from apps.parsers.clients.minpromtorg import ( IndustrialProductionClient, IndustrialProductsClient, ManufacturesClient, ) from apps.parsers.clients.proverki import ProverkiClient +from apps.parsers.clients.trudvsem import TrudvsemClient from apps.parsers.clients.zakupki import ZakupkiClient from apps.parsers.models import ParserLoadLog from apps.parsers.services import ( FNSReportService, + GenericParserRecordService, IndustrialCertificateService, IndustrialProductService, InspectionService, @@ -33,6 +36,7 @@ from apps.parsers.services import ( ProxyService, ProxyToolsSyncService, ) +from apps.parsers.source_registry import PARSER_SOURCES from celery import shared_task from requests.adapters import BaseAdapter @@ -88,6 +92,78 @@ def _get_or_create_background_job( return job +def _run_generic_parser( + self, + *, + source_key: str, + source: str, + task_name: str, + fetch_records, + requested_by_id: int | None = None, +) -> dict: + """Единый runner для новых разнородных источников.""" + load_log, batch_id = ParserLoadLogService.create_load_log_with_next_batch_id( + source=source, + status="in_progress", + ) + task_id = self.request.id or str(uuid.uuid4()) + job = _get_or_create_background_job( + task_id=task_id, + task_name=task_name, + source=source, + batch_id=batch_id, + requested_by_id=requested_by_id, + meta={"source_key": source_key}, + ) + job.mark_started() + job.update_progress(0, "Инициализация парсера...") + + try: + job.update_progress(20, "Загрузка данных из официального источника...") + records = fetch_records() + job.update_progress(70, f"Сохранение {len(records)} записей...") + saved_count = GenericParserRecordService.save_records( + records, + batch_id=batch_id, + source=source, + ) + ParserLoadLogService.update( + load_log, + status="success", + records_count=saved_count, + ) + result = {"batch_id": batch_id, "saved": saved_count, "status": "success"} + job.complete(result=result) + return result + except Exception as e: + logger.error("%s failed: %s", task_name, e, exc_info=True) + ParserLoadLogService.mark_failed(load_log, str(e)) + job.fail(error=str(e)) + raise + + +def _fetch_structured_records( + *, + source_key: str, + file_url: str | None, + file_path: str | None, + proxies: list[str] | None, +) -> list[GenericParserItem]: + """Загрузить records через structured client из URL или локального storage.""" + descriptor = PARSER_SOURCES[source_key] + client = StructuredDataClient(source=source_key, proxies=proxies) + if file_path: + from django.core.files.storage import default_storage + + with default_storage.open(file_path, "rb") as handle: + content = handle.read() + return client.fetch_records( + content=content, + file_name=Path(file_path).name, + ) + return client.fetch_records(file_url=file_url or descriptor.upstream_url) + + @shared_task(bind=True, base=CorePeriodicTask) def sync_ru_proxies(self) -> dict[str, int | str]: # noqa: ARG001 """Периодически загружать RU-прокси из Proxy-Tools.""" @@ -757,6 +833,7 @@ def parse_all_sources( Task IDs всех запущенных парсеров """ logger.info("Starting all parsers from all sources") + from django.conf import settings if client_adapter is not None: industrial_result = parse_industrial_production.apply( @@ -775,6 +852,7 @@ def parse_all_sources( "use_playwright": inspections_use_playwright, } ) + generic_results = {} else: industrial_result = parse_industrial_production.delay( proxies=proxies, @@ -793,12 +871,30 @@ def parse_all_sources( client_adapter=client_adapter, use_playwright=inspections_use_playwright, ) + generic_results = {} + if not getattr(settings, "CELERY_TASK_ALWAYS_EAGER", False): + generic_results = { + "procurements_44fz": parse_procurements_44fz.delay(proxies=proxies).id, + "procurements_223fz": parse_procurements_223fz.delay( + proxies=proxies + ).id, + "contracts": parse_contracts.delay(proxies=proxies).id, + "unfair_suppliers": parse_unfair_suppliers.delay(proxies=proxies).id, + "fas_goz": parse_fas_goz_evasion.delay(proxies=proxies).id, + "arbitration": parse_arbitration_cases.delay(proxies=proxies).id, + "fedresurs_bankruptcy": parse_fedresurs_bankruptcy.delay( + proxies=proxies + ).id, + "fstec": parse_fstec_registers.delay(proxies=proxies).id, + "trudvsem": parse_trudvsem_vacancies.delay(proxies=proxies).id, + } results = { "industrial": industrial_result.id, "industrial_products": industrial_products_result.id, "manufactures": manufactures_result.id, "inspections": inspections_result.id, + **generic_results, } return results @@ -1356,6 +1452,244 @@ def sync_procurements( # noqa: C901 raise +@shared_task(bind=True) +def parse_procurements_44fz( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг официальной выдачи ЕИС 44-ФЗ в GenericParserRecord.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="procurements_44fz", + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + task_name="apps.parsers.tasks.parse_procurements_44fz", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="procurements_44fz", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_procurements_223fz( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг официальной выдачи ЕИС 223-ФЗ в GenericParserRecord.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="procurements_223fz", + source=ParserLoadLog.Source.PROCUREMENTS_223FZ, + task_name="apps.parsers.tasks.parse_procurements_223fz", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="procurements_223fz", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_contracts( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг официальной выдачи контрактов ЕИС.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="contracts", + source=ParserLoadLog.Source.CONTRACTS, + task_name="apps.parsers.tasks.parse_contracts", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="contracts", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_unfair_suppliers( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг реестра недобросовестных поставщиков.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="unfair_suppliers", + source=ParserLoadLog.Source.UNFAIR_SUPPLIERS, + task_name="apps.parsers.tasks.parse_unfair_suppliers", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="unfair_suppliers", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_fas_goz_evasion( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг реестра ФАС по уклонению от ГОЗ.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="fas_goz", + source=ParserLoadLog.Source.FAS_GOZ, + task_name="apps.parsers.tasks.parse_fas_goz_evasion", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="fas_goz", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_arbitration_cases( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг выдачи КАД/арбитража в generic storage.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="arbitration", + source=ParserLoadLog.Source.ARBITRATION, + task_name="apps.parsers.tasks.parse_arbitration_cases", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="arbitration", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_fedresurs_bankruptcy( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг Федресурс/ЕФРСБ или ручной выгрузки от ответственного.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="fedresurs_bankruptcy", + source=ParserLoadLog.Source.FEDRESURS_BANKRUPTCY, + task_name="apps.parsers.tasks.parse_fedresurs_bankruptcy", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="fedresurs_bankruptcy", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_fstec_registers( + self, + *, + file_url: str | None = None, + file_path: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг реестров ФСТЭК.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="fstec", + source=ParserLoadLog.Source.FSTEC, + task_name="apps.parsers.tasks.parse_fstec_registers", + requested_by_id=requested_by_id, + fetch_records=lambda: _fetch_structured_records( + source_key="fstec", + file_url=file_url, + file_path=file_path, + proxies=proxies, + ), + ) + + +@shared_task(bind=True) +def parse_trudvsem_vacancies( + self, + *, + limit: int = 100, + offset: int = 0, + region_code: str | None = None, + company_inn: str | None = None, + text: str | None = None, + proxies: list[str] | None = None, + requested_by_id: int | None = None, +) -> dict: + """Парсинг вакансий Работа России.""" + proxies = _resolve_proxies(proxies) + return _run_generic_parser( + self, + source_key="trudvsem", + source=ParserLoadLog.Source.TRUDVSEM, + task_name="apps.parsers.tasks.parse_trudvsem_vacancies", + requested_by_id=requested_by_id, + fetch_records=lambda: TrudvsemClient(proxies=proxies).fetch_vacancies( + limit=limit, + offset=offset, + region_code=region_code, + company_inn=company_inn, + text=text, + ), + ) + + # ============================================================================= # FNS Tasks (File Watch & Processing) # ============================================================================= diff --git a/src/apps/parsers/urls.py b/src/apps/parsers/urls.py index e0e84b5..493680a 100644 --- a/src/apps/parsers/urls.py +++ b/src/apps/parsers/urls.py @@ -7,12 +7,20 @@ URL конфигурация для приложения парсеров. from apps.parsers.views import ( FinancialReportViewSet, FNSReportUploadView, + GenericParserRecordListView, IndustrialCertificateViewSet, IndustrialProductViewSet, InspectionViewSet, ManufacturerViewSet, + ParserDashboardDataView, ParserLoadLogExportView, + ParserLoadLogListView, ParserLoadLogViewSet, + ParserRunView, + ParserScheduleDetailView, + ParserScheduleListCreateView, + ParserSourceListView, + ParserUploadView, ParsingSettingsView, ProcurementViewSet, SourceCardDetailView, @@ -117,4 +125,25 @@ system_urlpatterns = [ # Legacy urlpatterns (пусто, используется app_name) # ============================================================================= -urlpatterns = [] +urlpatterns = [ + path("sources/", ParserSourceListView.as_view(), name="source-list"), + path("dashboard/", ParserDashboardDataView.as_view(), name="dashboard-data"), + path("run//", ParserRunView.as_view(), name="run-parser"), + path( + "upload//", + ParserUploadView.as_view(), + name="upload-parser-data", + ), + path( + "schedules/", + ParserScheduleListCreateView.as_view(), + name="schedule-list", + ), + path( + "schedules//", + ParserScheduleDetailView.as_view(), + name="schedule-detail", + ), + path("load-logs/", ParserLoadLogListView.as_view(), name="load-log-list"), + path("records/", GenericParserRecordListView.as_view(), name="generic-record-list"), +] diff --git a/src/apps/parsers/views.py b/src/apps/parsers/views.py index 9b65da6..0e6ca7b 100644 --- a/src/apps/parsers/views.py +++ b/src/apps/parsers/views.py @@ -6,12 +6,18 @@ Views для приложения парсеров. """ import csv +import json +import uuid from apps.core.openapi import CommonResponses, ErrorResponses, swagger_tag -from apps.core.response import api_response +from apps.core.response import api_error_response, api_response +from apps.core.serializers import BackgroundJobListSerializer +from apps.core.services import BackgroundJobService +from apps.parsers import tasks from apps.parsers.fns_upload import FNSUploadService from apps.parsers.models import ( FinancialReport, + GenericParserRecord, IndustrialCertificateRecord, IndustrialProductRecord, InspectionRecord, @@ -25,13 +31,22 @@ from apps.parsers.serializers import ( FinancialReportSerializer, FNSFileUploadSerializer, FNSFileUploadSuccessSerializer, + GenericParserRecordSerializer, IndustrialCertificateSerializer, IndustrialProductSerializer, InspectionSerializer, ManufacturerSerializer, + ParserListQuerySerializer, ParserLoadLogListSerializer, ParserLoadLogPageSerializer, ParserLoadLogSerializer, + ParserResultQuerySerializer, + ParserResultRecordSerializer, + ParserRunRequestSerializer, + ParserScheduleRequestSerializer, + ParserScheduleSerializer, + ParserSourceSerializer, + ParserUploadRequestSerializer, ParsingSettingsSerializer, ProcurementSerializer, SourceCardDetailResponseSerializer, @@ -45,16 +60,22 @@ from apps.parsers.serializers import ( SourceTaskStatusSerializer, ) from apps.parsers.source_cards import SourceCardService +from apps.parsers.source_registry import PARSER_SOURCES +from django.core.files.storage import default_storage from django.core.paginator import Paginator from django.db.models import CharField, Count, Q from django.db.models.functions import Cast from django.http import HttpResponse +from django.utils.text import get_valid_filename +from django.views.generic import TemplateView +from django_celery_beat.models import CrontabSchedule, IntervalSchedule, PeriodicTask from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status from rest_framework.exceptions import ValidationError -from rest_framework.parsers import MultiPartParser +from rest_framework.parsers import FormParser, MultiPartParser from rest_framework.permissions import IsAdminUser, IsAuthenticated +from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView from rest_framework.viewsets import ReadOnlyModelViewSet @@ -70,6 +91,196 @@ FNS_TAG = swagger_tag("ФНС - Бухгалтерская отчетность" SOURCES_TAG = swagger_tag("Источники для фронта", "frontend_sources") SYSTEM_TAG = swagger_tag("Системные", "system") PARSING_TAG = swagger_tag("Настройки парсинга", "parsing_settings") +PARSERS_TAG = "Parser Management" +MINPROMTORG_RESULT_TAG = "Minpromtorg" +PROVERKI_RESULT_TAG = "Prosecutor General Inspections" +ZAKUPKI_RESULT_TAG = "EIS Zakupki" +FAS_RESULT_TAG = "FAS" +FNS_RESULT_TAG = "FNS" +ARBITRATION_RESULT_TAG = "Arbitration" +FEDRESURS_RESULT_TAG = "Fedresurs" +FSTEC_RESULT_TAG = "FSTEC" +TRUDVSEM_RESULT_TAG = "Trudvsem" +SOURCE_RESULT_TAGS = { + "industrial": MINPROMTORG_RESULT_TAG, + "manufactures": MINPROMTORG_RESULT_TAG, + "mpt_products": MINPROMTORG_RESULT_TAG, + "inspections": PROVERKI_RESULT_TAG, + "sync_inspections": PROVERKI_RESULT_TAG, + "procurements_44fz": ZAKUPKI_RESULT_TAG, + "procurements_223fz": ZAKUPKI_RESULT_TAG, + "contracts": ZAKUPKI_RESULT_TAG, + "unfair_suppliers": FAS_RESULT_TAG, + "fas_goz": FAS_RESULT_TAG, + "fns_financial": FNS_RESULT_TAG, + "arbitration": ARBITRATION_RESULT_TAG, + "fedresurs_bankruptcy": FEDRESURS_RESULT_TAG, + "fstec": FSTEC_RESULT_TAG, + "trudvsem": TRUDVSEM_RESULT_TAG, +} + +TASKS_BY_NAME = { + "apps.parsers.tasks.parse_industrial_production": tasks.parse_industrial_production, + "apps.parsers.tasks.parse_industrial_products": tasks.parse_industrial_products, + "apps.parsers.tasks.parse_manufactures": tasks.parse_manufactures, + "apps.parsers.tasks.parse_inspections": tasks.parse_inspections, + "apps.parsers.tasks.sync_inspections": tasks.sync_inspections, + "apps.parsers.tasks.parse_procurements_44fz": tasks.parse_procurements_44fz, + "apps.parsers.tasks.parse_procurements_223fz": tasks.parse_procurements_223fz, + "apps.parsers.tasks.parse_contracts": tasks.parse_contracts, + "apps.parsers.tasks.parse_unfair_suppliers": tasks.parse_unfair_suppliers, + "apps.parsers.tasks.parse_fas_goz_evasion": tasks.parse_fas_goz_evasion, + "apps.parsers.tasks.scan_fns_directory": tasks.scan_fns_directory, + "apps.parsers.tasks.parse_arbitration_cases": tasks.parse_arbitration_cases, + "apps.parsers.tasks.parse_fedresurs_bankruptcy": tasks.parse_fedresurs_bankruptcy, + "apps.parsers.tasks.parse_fstec_registers": tasks.parse_fstec_registers, + "apps.parsers.tasks.parse_trudvsem_vacancies": tasks.parse_trudvsem_vacancies, +} +PARSER_TASK_NAMES = set(TASKS_BY_NAME) +NATIVE_RECORD_MODELS = { + ParserLoadLog.Source.INDUSTRIAL: IndustrialCertificateRecord, + ParserLoadLog.Source.INDUSTRIAL_PRODUCTS: IndustrialProductRecord, + ParserLoadLog.Source.MANUFACTURES: ManufacturerRecord, + ParserLoadLog.Source.INSPECTIONS: InspectionRecord, + ParserLoadLog.Source.PROCUREMENTS: ProcurementRecord, +} +EXISTING_TASK_PARAMS = { + "industrial": {"proxies", "requested_by_id"}, + "manufactures": {"proxies", "requested_by_id"}, + "mpt_products": {"proxies", "requested_by_id"}, + "inspections": {"year", "month", "file_url", "proxies", "requested_by_id"}, + "sync_inspections": { + "proxies", + "requested_by_id", + "max_months_per_law", + "start_year", + "start_month", + "include_fz294", + "include_fz248", + "current_year", + "current_month", + }, + "fns_financial": {"requested_by_id"}, +} +TRUDVSEM_PARAMS = { + "limit", + "offset", + "region_code", + "company_inn", + "text", + "proxies", + "requested_by_id", +} +GENERIC_FILE_PARAMS = {"file_url", "file_path", "proxies", "requested_by_id"} +PAGE_PARAM = openapi.Parameter( + "page", + openapi.IN_QUERY, + description="Номер страницы", + type=openapi.TYPE_INTEGER, +) +PAGE_SIZE_PARAM = openapi.Parameter( + "page_size", + openapi.IN_QUERY, + description="Размер страницы, максимум 100", + type=openapi.TYPE_INTEGER, +) +LIMIT_PARAM = openapi.Parameter( + "limit", + openapi.IN_QUERY, + description="Максимальное количество записей", + type=openapi.TYPE_INTEGER, +) +RECORD_ID_PARAM = openapi.Parameter( + "id", + openapi.IN_QUERY, + description="ID записи", + type=openapi.TYPE_INTEGER, +) +SOURCE_PARAM = openapi.Parameter( + "source", + openapi.IN_QUERY, + description="Ключ или значение источника данных", + type=openapi.TYPE_STRING, +) +STATUS_PARAM = openapi.Parameter( + "status", + openapi.IN_QUERY, + description="Фильтр по статусу", + type=openapi.TYPE_STRING, +) +EXTERNAL_ID_PARAM = openapi.Parameter( + "external_id", + openapi.IN_QUERY, + description="Стабильный ID записи во внешнем источнике", + type=openapi.TYPE_STRING, +) +INN_PARAM = openapi.Parameter( + "inn", + openapi.IN_QUERY, + description="Фильтр по ИНН", + type=openapi.TYPE_STRING, +) +OGRN_PARAM = openapi.Parameter( + "ogrn", + openapi.IN_QUERY, + description="Фильтр по ОГРН", + type=openapi.TYPE_STRING, +) +LOAD_BATCH_PARAM = openapi.Parameter( + "load_batch", + openapi.IN_QUERY, + description="Batch загрузки", + type=openapi.TYPE_INTEGER, +) +SEARCH_PARAM = openapi.Parameter( + "search", + openapi.IN_QUERY, + description="Поиск по текстовым полям источника", + type=openapi.TYPE_STRING, +) +ORDERING_PARAM = openapi.Parameter( + "ordering", + openapi.IN_QUERY, + description="Сортировка", + type=openapi.TYPE_STRING, +) +INCLUDE_PAYLOAD_PARAM = openapi.Parameter( + "include_payload", + openapi.IN_QUERY, + description="Вернуть исходный payload записи", + type=openapi.TYPE_BOOLEAN, +) +UPLOAD_FILE_PARAM = openapi.Parameter( + "file", + openapi.IN_FORM, + description="JSON, CSV, XML, HTML, XLSX/XLSM или ZIP с файлами реестра", + type=openapi.TYPE_FILE, + required=True, +) +RESULT_LIST_PARAMS = [ + PAGE_PARAM, + PAGE_SIZE_PARAM, + LIMIT_PARAM, + RECORD_ID_PARAM, + SOURCE_PARAM, + EXTERNAL_ID_PARAM, + INN_PARAM, + OGRN_PARAM, + LOAD_BATCH_PARAM, + STATUS_PARAM, + SEARCH_PARAM, + ORDERING_PARAM, + INCLUDE_PAYLOAD_PARAM, +] +RESULT_DETAIL_PARAMS = [ + SOURCE_PARAM, + EXTERNAL_ID_PARAM, + INN_PARAM, + OGRN_PARAM, + LOAD_BATCH_PARAM, + STATUS_PARAM, + INCLUDE_PAYLOAD_PARAM, +] PARSER_LOG_ORDERING_FIELDS = { "id", @@ -1091,3 +1302,805 @@ class ParserLoadLogExportView(APIView): ) return response + + +def _allowed_task_params(source_key: str) -> set[str]: + if source_key in EXISTING_TASK_PARAMS: + return EXISTING_TASK_PARAMS[source_key] + if source_key == "trudvsem": + return TRUDVSEM_PARAMS + return GENERIC_FILE_PARAMS + + +def build_task_kwargs(source_key: str, params: dict, user_id: int) -> dict: + """Оставить только kwargs задачи и закрепить владельца запуска.""" + allowed = _allowed_task_params(source_key) + task_kwargs = {key: value for key, value in params.items() if key in allowed} + if "requested_by_id" in allowed: + task_kwargs["requested_by_id"] = user_id + return task_kwargs + + +def _save_uploaded_parser_file(uploaded_file) -> str: + safe_name = get_valid_filename(uploaded_file.name or "parser-upload") + return default_storage.save( + f"parser_uploads/{uuid.uuid4()}-{safe_name}", + uploaded_file, + ) + + +def _model_payload(record) -> dict: + payload = {} + for field in record._meta.fields: + value = getattr(record, field.name) + if hasattr(value, "isoformat"): + value = value.isoformat() + payload[field.name] = value + return payload + + +def _native_record_to_result( + source: str, + record, + *, + include_payload: bool = True, +) -> dict: + if source == ParserLoadLog.Source.INDUSTRIAL: + external_id = record.certificate_number + organisation_name = record.organisation_name + title = record.certificate_number + record_date = record.issue_date + status_value = "" + url = record.certificate_file_url + inn = record.inn + ogrn = record.ogrn + elif source == ParserLoadLog.Source.INDUSTRIAL_PRODUCTS: + external_id = record.registry_number + organisation_name = record.full_organisation_name + title = record.product_name + record_date = "" + status_value = "" + url = "" + inn = record.inn + ogrn = record.ogrn + elif source == ParserLoadLog.Source.MANUFACTURES: + external_id = record.inn + organisation_name = record.full_legal_name + title = record.full_legal_name + record_date = "" + status_value = "" + url = "" + inn = record.inn + ogrn = record.ogrn + elif source == ParserLoadLog.Source.PROCUREMENTS: + external_id = record.purchase_number + organisation_name = record.customer_name + title = record.purchase_name + record_date = record.publish_date + status_value = record.status + url = record.href + inn = record.customer_inn + ogrn = record.customer_ogrn + else: + external_id = record.registration_number + organisation_name = record.organisation_name + title = record.control_authority + record_date = record.start_date + status_value = record.status + url = "" + inn = record.inn + ogrn = record.ogrn + + return { + "id": record.id, + "load_batch": record.load_batch, + "source": source, + "external_id": external_id, + "inn": inn, + "ogrn": ogrn, + "organisation_name": organisation_name, + "title": title, + "record_date": record_date, + "amount": getattr(record, "max_price_amount", None), + "status": status_value, + "url": url, + "payload": _model_payload(record) if include_payload else {}, + "created_at": record.created_at, + "updated_at": record.updated_at, + } + + +def _generic_record_to_result( + record: GenericParserRecord, + *, + include_payload: bool = True, +) -> dict: + return { + "id": record.id, + "load_batch": record.load_batch, + "source": record.source, + "external_id": record.external_id, + "inn": record.inn, + "ogrn": record.ogrn, + "organisation_name": record.organisation_name, + "title": record.title, + "record_date": record.record_date, + "amount": record.amount, + "status": record.status, + "url": record.url, + "payload": record.payload if include_payload else {}, + "created_at": record.created_at, + "updated_at": record.updated_at, + } + + +def _source_key_by_task_name(task_name: str) -> str: + for source_key, descriptor in PARSER_SOURCES.items(): + if descriptor.task_name == task_name: + return source_key + return "" + + +def _parse_periodic_task_kwargs(periodic_task: PeriodicTask) -> dict: + try: + return json.loads(periodic_task.kwargs or "{}") + except (TypeError, ValueError): + return {} + + +def _periodic_task_to_dict(periodic_task: PeriodicTask) -> dict: + source_key = _source_key_by_task_name(periodic_task.task) + descriptor = PARSER_SOURCES.get(source_key) + if periodic_task.interval_id: + schedule_type = "interval" + schedule = { + "every": periodic_task.interval.every, + "period": periodic_task.interval.period, + } + elif periodic_task.crontab_id: + schedule_type = "crontab" + schedule = { + "minute": periodic_task.crontab.minute, + "hour": periodic_task.crontab.hour, + "day_of_week": periodic_task.crontab.day_of_week, + "day_of_month": periodic_task.crontab.day_of_month, + "month_of_year": periodic_task.crontab.month_of_year, + } + else: + schedule_type = "unsupported" + schedule = {} + + return { + "id": periodic_task.id, + "name": periodic_task.name, + "source_key": source_key, + "source": descriptor.source if descriptor else "", + "title": descriptor.title if descriptor else periodic_task.task, + "task_name": periodic_task.task, + "enabled": periodic_task.enabled, + "schedule_type": schedule_type, + "schedule": schedule, + "params": _parse_periodic_task_kwargs(periodic_task), + "last_run_at": periodic_task.last_run_at, + "total_run_count": periodic_task.total_run_count, + "date_changed": periodic_task.date_changed, + } + + +def _parser_periodic_tasks_for_user(user) -> list[PeriodicTask]: + queryset = ( + PeriodicTask.objects.filter(task__in=PARSER_TASK_NAMES) + .select_related("interval", "crontab") + .order_by("name") + ) + if user.is_staff: + return list(queryset) + result = [] + for periodic_task in queryset: + params = _parse_periodic_task_kwargs(periodic_task) + if params.get("requested_by_id") == user.id: + result.append(periodic_task) + return result + + +def _get_parser_periodic_task_for_user(pk: int, user) -> PeriodicTask | None: + for periodic_task in _parser_periodic_tasks_for_user(user): + if periodic_task.pk == pk: + return periodic_task + return None + + +def source_result_swagger_tag(source_key: str) -> str: + return SOURCE_RESULT_TAGS.get(source_key, PARSERS_TAG) + + +def _safe_ordering(ordering: str, field_map: dict[str, str]) -> list[str]: + result = [] + for raw_field in (item.strip() for item in ordering.split(",") if item.strip()): + desc = raw_field.startswith("-") + api_field = raw_field[1:] if desc else raw_field + model_field = field_map.get(api_field) + if model_field: + result.append(f"-{model_field}" if desc else model_field) + return result + + +def _native_field_map(source: str) -> dict[str, str]: + common = { + "id": "id", + "load_batch": "load_batch", + "inn": "inn", + "ogrn": "ogrn", + "created_at": "created_at", + "updated_at": "updated_at", + } + if source == ParserLoadLog.Source.INDUSTRIAL: + return { + **common, + "external_id": "certificate_number", + "organisation_name": "organisation_name", + "title": "certificate_number", + "record_date": "issue_date", + } + if source == ParserLoadLog.Source.INDUSTRIAL_PRODUCTS: + return { + **common, + "external_id": "registry_number", + "organisation_name": "full_organisation_name", + "title": "product_name", + } + if source == ParserLoadLog.Source.MANUFACTURES: + return { + **common, + "external_id": "inn", + "organisation_name": "full_legal_name", + "title": "full_legal_name", + } + if source == ParserLoadLog.Source.PROCUREMENTS: + return { + **common, + "external_id": "purchase_number", + "organisation_name": "customer_name", + "title": "purchase_name", + "record_date": "publish_date", + "status": "status", + } + return { + **common, + "external_id": "registration_number", + "organisation_name": "organisation_name", + "title": "control_authority", + "record_date": "start_date", + "status": "status", + } + + +def _native_search_q(source: str, search: str) -> Q: + if source == ParserLoadLog.Source.INDUSTRIAL: + return ( + Q(organisation_name__icontains=search) + | Q(certificate_number__icontains=search) + | Q(inn__icontains=search) + | Q(ogrn__icontains=search) + ) + if source == ParserLoadLog.Source.INDUSTRIAL_PRODUCTS: + return ( + Q(full_organisation_name__icontains=search) + | Q(product_name__icontains=search) + | Q(registry_number__icontains=search) + | Q(inn__icontains=search) + | Q(ogrn__icontains=search) + ) + if source == ParserLoadLog.Source.MANUFACTURES: + return ( + Q(full_legal_name__icontains=search) + | Q(inn__icontains=search) + | Q(ogrn__icontains=search) + | Q(address__icontains=search) + ) + if source == ParserLoadLog.Source.PROCUREMENTS: + return ( + Q(purchase_name__icontains=search) + | Q(purchase_number__icontains=search) + | Q(customer_name__icontains=search) + | Q(customer_inn__icontains=search) + ) + return ( + Q(organisation_name__icontains=search) + | Q(registration_number__icontains=search) + | Q(inn__icontains=search) + | Q(ogrn__icontains=search) + | Q(control_authority__icontains=search) + | Q(status__icontains=search) + ) + + +def _generic_search_q(search: str) -> Q: + return ( + Q(external_id__icontains=search) + | Q(organisation_name__icontains=search) + | Q(title__icontains=search) + | Q(inn__icontains=search) + | Q(ogrn__icontains=search) + | Q(status__icontains=search) + | Q(url__icontains=search) + ) + + +def _route_model_sources(descriptor) -> set[str]: + return { + item.source + for item in PARSER_SOURCES.values() + if item.api_route == descriptor.api_route + } + + +def _result_sources_for_request(descriptor, params: dict) -> set[str]: + route_sources = _route_model_sources(descriptor) + requested_source = params.get("source") + if not requested_source: + return route_sources + if requested_source in route_sources: + return {requested_source} + requested_descriptor = PARSER_SOURCES.get(requested_source) + if requested_descriptor and requested_descriptor.api_route == descriptor.api_route: + return {requested_descriptor.source} + return set() + + +def _filter_native_result_queryset(source: str, params: dict, sources: set[str]): + queryset = NATIVE_RECORD_MODELS[source].objects.all() + if not sources: + queryset = queryset.none() + field_map = _native_field_map(source) + for api_field in ("id", "external_id", "inn", "ogrn", "load_batch", "status"): + value = params.get(api_field) + model_field = field_map.get(api_field) + if value not in ("", None) and model_field: + queryset = queryset.filter(**{model_field: value}) + if params.get("record_date") and field_map.get("record_date"): + queryset = queryset.filter(**{field_map["record_date"]: params["record_date"]}) + if params.get("search"): + queryset = queryset.filter(_native_search_q(source, params["search"])) + ordering = _safe_ordering(params.get("ordering") or "-created_at", field_map) + return queryset.order_by(*(ordering or ["-created_at"])) + + +def _filter_generic_result_queryset(sources: set[str], params: dict): + queryset = GenericParserRecord.objects.filter(source__in=sources) + if not sources: + queryset = queryset.none() + for field in ("id", "external_id", "inn", "ogrn", "load_batch", "status"): + value = params.get(field) + if value not in ("", None): + queryset = queryset.filter(**{field: value}) + if params.get("record_date"): + queryset = queryset.filter(record_date=params["record_date"]) + if params.get("search"): + queryset = queryset.filter(_generic_search_q(params["search"])) + field_map = { + "id": "id", + "load_batch": "load_batch", + "external_id": "external_id", + "inn": "inn", + "ogrn": "ogrn", + "organisation_name": "organisation_name", + "title": "title", + "record_date": "record_date", + "status": "status", + "created_at": "created_at", + "updated_at": "updated_at", + } + ordering = _safe_ordering(params.get("ordering") or "-created_at", field_map) + return queryset.order_by(*(ordering or ["-created_at"])) + + +def _filter_result_queryset(source_key: str, params: dict): + descriptor = PARSER_SOURCES.get(source_key) + if descriptor is None: + return None, None + sources = _result_sources_for_request(descriptor, params) + source = descriptor.source + if source in NATIVE_RECORD_MODELS: + return descriptor, _filter_native_result_queryset(source, params, sources) + return descriptor, _filter_generic_result_queryset(sources, params) + + +def _result_record_to_dict(source: str, record, *, include_payload: bool) -> dict: + if source in NATIVE_RECORD_MODELS: + return _native_record_to_result( + source, + record, + include_payload=include_payload, + ) + return _generic_record_to_result(record, include_payload=include_payload) + + +def _source_not_found_response(source_key: str): + return api_error_response( + [ + { + "code": "unknown_parser_source", + "message": f"Неизвестный источник: {source_key}", + } + ], + status_code=status.HTTP_404_NOT_FOUND, + ) + + +class SourceResultListView(APIView): + """GET list результата конкретного источника.""" + + permission_classes = [IsAuthenticated] + source_key = "" + + def get(self, request: Request, source_key: str | None = None): + resolved_source_key = source_key or self.source_key + query_serializer = ParserResultQuerySerializer(data=request.query_params) + query_serializer.is_valid(raise_exception=True) + params = query_serializer.validated_data + + descriptor, queryset = _filter_result_queryset(resolved_source_key, params) + if descriptor is None: + return _source_not_found_response(resolved_source_key) + + paginator = Paginator(queryset, params["page_size"]) + page_obj = paginator.get_page(params["page"]) + rows = [ + _result_record_to_dict( + descriptor.source, + record, + include_payload=params["include_payload"], + ) + for record in page_obj.object_list + ] + serializer = ParserResultRecordSerializer(rows, many=True) + return api_response( + serializer.data, + pagination={ + "count": paginator.count, + "page": page_obj.number, + "page_size": params["page_size"], + "pages": paginator.num_pages, + }, + ) + + +class SourceResultDetailView(APIView): + """GET одной записи результата источника.""" + + permission_classes = [IsAuthenticated] + source_key = "" + + def get(self, request: Request, pk: int, source_key: str | None = None): + resolved_source_key = source_key or self.source_key + query_serializer = ParserResultQuerySerializer(data=request.query_params) + query_serializer.is_valid(raise_exception=True) + params = {**query_serializer.validated_data, "id": pk} + + descriptor, queryset = _filter_result_queryset(resolved_source_key, params) + if descriptor is None: + return _source_not_found_response(resolved_source_key) + record = queryset.first() + if record is None: + return api_error_response( + [{"code": "not_found", "message": "Запись не найдена"}], + status_code=status.HTTP_404_NOT_FOUND, + ) + data = _result_record_to_dict( + descriptor.source, + record, + include_payload=params["include_payload"], + ) + return api_response(ParserResultRecordSerializer(data).data) + + +class ParserSourceListView(APIView): + """Каталог parser sources для dashboard.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema(tags=[PARSERS_TAG], responses={200: ParserSourceSerializer}) + def get(self, request: Request): + serializer = ParserSourceSerializer(PARSER_SOURCES.values(), many=True) + return api_response(serializer.data) + + +class ParserRunView(APIView): + """Немедленный запуск parser Celery-задачи.""" + + permission_classes = [IsAuthenticated] + + def post(self, request: Request, source_key: str): + descriptor = PARSER_SOURCES.get(source_key) + if descriptor is None: + return _source_not_found_response(source_key) + serializer = ParserRunRequestSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + task = TASKS_BY_NAME[descriptor.task_name] + task_kwargs = build_task_kwargs( + source_key, serializer.validated_data, request.user.id + ) + task_id = str(uuid.uuid4()) + BackgroundJobService.create_job( + task_id=task_id, + task_name=descriptor.task_name, + user_id=request.user.id, + meta={"source_key": source_key, "source": descriptor.source}, + ) + async_result = task.apply_async(kwargs=task_kwargs, task_id=task_id) + return api_response( + { + "task_id": async_result.id, + "source": descriptor.source, + "task_name": descriptor.task_name, + }, + status_code=status.HTTP_202_ACCEPTED, + ) + + +class ParserUploadView(APIView): + """Ручная загрузка файла только для источников с supports_file_upload.""" + + permission_classes = [IsAuthenticated] + parser_classes = [MultiPartParser, FormParser] + + def post(self, request: Request, source_key: str): + descriptor = PARSER_SOURCES.get(source_key) + if descriptor is None: + return _source_not_found_response(source_key) + if not descriptor.supports_file_upload: + return api_error_response( + [ + { + "code": "upload_not_supported", + "message": "Для источника нет ручной загрузки", + } + ], + status_code=status.HTTP_400_BAD_REQUEST, + ) + serializer = ParserUploadRequestSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + file_path = _save_uploaded_parser_file(serializer.validated_data["file"]) + run_serializer = ParserRunRequestSerializer(data={"file_path": file_path}) + run_serializer.is_valid(raise_exception=True) + task = TASKS_BY_NAME[descriptor.task_name] + task_kwargs = build_task_kwargs( + source_key, + {"file_path": file_path}, + request.user.id, + ) + task_id = str(uuid.uuid4()) + BackgroundJobService.create_job( + task_id=task_id, + task_name=descriptor.task_name, + user_id=request.user.id, + meta={ + "source_key": source_key, + "source": descriptor.source, + "file_path": file_path, + }, + ) + async_result = task.apply_async(kwargs=task_kwargs, task_id=task_id) + return api_response( + { + "task_id": async_result.id, + "source": descriptor.source, + "task_name": descriptor.task_name, + }, + status_code=status.HTTP_202_ACCEPTED, + ) + + +class ParserLoadLogListView(APIView): + """Последние ParserLoadLog с безопасной валидацией limit.""" + + permission_classes = [IsAuthenticated] + + def get(self, request: Request): + query_serializer = ParserListQuerySerializer(data=request.query_params) + query_serializer.is_valid(raise_exception=True) + queryset = ParserLoadLog.objects.all().order_by("-created_at") + source = request.query_params.get("source") + status_filter = request.query_params.get("status") + if source: + queryset = queryset.filter(source=source) + if status_filter: + queryset = queryset.filter(status=status_filter) + serializer = ParserLoadLogSerializer( + queryset[: query_serializer.validated_data["limit"]], + many=True, + ) + return api_response(serializer.data) + + +class GenericParserRecordListView(APIView): + """Единый read endpoint для dashboard по generic/native records.""" + + permission_classes = [IsAuthenticated] + + def get(self, request: Request): + query_serializer = ParserListQuerySerializer(data=request.query_params) + query_serializer.is_valid(raise_exception=True) + limit = query_serializer.validated_data["limit"] + source = request.query_params.get("source") + if source in NATIVE_RECORD_MODELS: + rows = [ + _native_record_to_result(source, record) + for record in NATIVE_RECORD_MODELS[source].objects.all()[:limit] + ] + return api_response(rows) + queryset = GenericParserRecord.objects.all() + if source: + queryset = queryset.filter(source=source) + serializer = GenericParserRecordSerializer(queryset[:limit], many=True) + return api_response(serializer.data) + + +class ParserScheduleListCreateView(APIView): + """Список и создание django-celery-beat PeriodicTask parser задач.""" + + permission_classes = [IsAuthenticated] + + def get(self, request: Request): + schedules = [ + _periodic_task_to_dict(task) + for task in _parser_periodic_tasks_for_user(request.user) + ] + return api_response(ParserScheduleSerializer(schedules, many=True).data) + + def post(self, request: Request): + serializer = ParserScheduleRequestSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + params = serializer.validated_data + source_key = params.get("source_key") + if not source_key: + return api_error_response( + [{"code": "source_required", "message": "source_key обязателен"}] + ) + if source_key not in PARSER_SOURCES: + return _source_not_found_response(source_key) + periodic_task = self._upsert_periodic_task( + source_key=source_key, + params=params, + user_id=request.user.id, + ) + return api_response( + ParserScheduleSerializer(_periodic_task_to_dict(periodic_task)).data, + status_code=status.HTTP_201_CREATED, + ) + + @staticmethod + def _upsert_periodic_task( + *, + source_key: str, + params: dict, + user_id: int, + existing: PeriodicTask | None = None, + ) -> PeriodicTask: + descriptor = PARSER_SOURCES[source_key] + task_kwargs = build_task_kwargs(source_key, params, user_id) + schedule_defaults = {"interval": None, "crontab": None} + if params["schedule_type"] == "interval": + schedule_defaults["interval"], _ = IntervalSchedule.objects.get_or_create( + every=params["every"], + period=params["period"], + ) + else: + schedule_defaults["crontab"], _ = CrontabSchedule.objects.get_or_create( + minute=params.get("minute", "0"), + hour=params.get("hour", "*"), + day_of_week=params.get("day_of_week", "*"), + day_of_month=params.get("day_of_month", "*"), + month_of_year=params.get("month_of_year", "*"), + timezone="Europe/Moscow", + ) + name = params.get("name") or f"parser:{source_key}:user:{user_id}" + defaults = { + "task": descriptor.task_name, + "kwargs": json.dumps(task_kwargs), + "enabled": params.get("enabled", True), + "description": f"Parser dashboard schedule: {descriptor.title}; user_id={user_id}", + **schedule_defaults, + } + if existing: + for field, value in defaults.items(): + setattr(existing, field, value) + existing.name = name + existing.save() + return existing + periodic_task, _ = PeriodicTask.objects.update_or_create( + name=name, + defaults=defaults, + ) + return periodic_task + + +class ParserScheduleDetailView(APIView): + """Обновление, включение/выключение и удаление расписания.""" + + permission_classes = [IsAuthenticated] + + def patch(self, request: Request, pk: int): + periodic_task = _get_parser_periodic_task_for_user(pk, request.user) + if periodic_task is None: + return api_error_response( + [{"code": "not_found", "message": "Расписание не найдено"}], + status_code=status.HTTP_404_NOT_FOUND, + ) + current = _periodic_task_to_dict(periodic_task) + data = { + "source_key": current["source_key"], + **current["params"], + **request.data, + } + serializer = ParserScheduleRequestSerializer(data=data) + serializer.is_valid(raise_exception=True) + updated = ParserScheduleListCreateView._upsert_periodic_task( + source_key=serializer.validated_data["source_key"], + params=serializer.validated_data, + user_id=request.user.id, + existing=periodic_task, + ) + return api_response( + ParserScheduleSerializer(_periodic_task_to_dict(updated)).data + ) + + def delete(self, request: Request, pk: int): + periodic_task = _get_parser_periodic_task_for_user(pk, request.user) + if periodic_task is None: + return api_error_response( + [{"code": "not_found", "message": "Расписание не найдено"}], + status_code=status.HTTP_404_NOT_FOUND, + ) + periodic_task.delete() + return Response(status=status.HTTP_204_NO_CONTENT) + + +class ParserDashboardDataView(APIView): + """Данные внешнего dashboard парсеров.""" + + permission_classes = [IsAuthenticated] + + def get(self, request: Request): + sources = ParserSourceSerializer(PARSER_SOURCES.values(), many=True).data + jobs = BackgroundJobService.get_user_jobs(user_id=request.user.id, limit=30) + source_counts = dict( + GenericParserRecord.objects.values("source") + .annotate(count=Count("id")) + .values_list("source", "count") + ) + source_counts.update( + { + source: model.objects.count() + for source, model in NATIVE_RECORD_MODELS.items() + } + ) + schedules = [ + _periodic_task_to_dict(task) + for task in _parser_periodic_tasks_for_user(request.user) + ] + return api_response( + { + "sources": sources, + "api_sources": [ + source for source in sources if not source["supports_file_upload"] + ], + "file_sources": [ + source for source in sources if source["supports_file_upload"] + ], + "schedules": ParserScheduleSerializer(schedules, many=True).data, + "jobs": BackgroundJobListSerializer(jobs, many=True).data, + "source_counts": source_counts, + "load_logs": ParserLoadLogSerializer( + ParserLoadLog.objects.all().order_by("-created_at")[:30], + many=True, + ).data, + } + ) + + +class ParserDashboardPageView(TemplateView): + """HTML-страница dashboard вне Django admin.""" + + template_name = "dashboard.html" diff --git a/src/apps/registers/__init__.py b/src/apps/registers/__init__.py new file mode 100644 index 0000000..cff6a43 --- /dev/null +++ b/src/apps/registers/__init__.py @@ -0,0 +1 @@ +"""Compatibility package for legacy apps.registers imports.""" diff --git a/src/apps/registers/admin.py b/src/apps/registers/admin.py new file mode 100644 index 0000000..195cb5f --- /dev/null +++ b/src/apps/registers/admin.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers admin module.""" + +from registers.admin import * # noqa: F403 diff --git a/src/apps/registers/apps.py b/src/apps/registers/apps.py new file mode 100644 index 0000000..7744a5d --- /dev/null +++ b/src/apps/registers/apps.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers app config.""" + +from registers.apps import * # noqa: F403 diff --git a/src/apps/registers/models.py b/src/apps/registers/models.py new file mode 100644 index 0000000..b5cf5db --- /dev/null +++ b/src/apps/registers/models.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers models.""" + +from registers.models import * # noqa: F403 diff --git a/src/apps/registers/serializers.py b/src/apps/registers/serializers.py new file mode 100644 index 0000000..0ca1455 --- /dev/null +++ b/src/apps/registers/serializers.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers serializers.""" + +from registers.serializers import * # noqa: F403 diff --git a/src/apps/registers/services.py b/src/apps/registers/services.py new file mode 100644 index 0000000..18a1cc0 --- /dev/null +++ b/src/apps/registers/services.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers services.""" + +from registers.services import * # noqa: F403 diff --git a/src/apps/registers/signals.py b/src/apps/registers/signals.py new file mode 100644 index 0000000..242153a --- /dev/null +++ b/src/apps/registers/signals.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers signals.""" + +from registers.signals import * # noqa: F403 diff --git a/src/apps/registers/urls.py b/src/apps/registers/urls.py new file mode 100644 index 0000000..e0184c2 --- /dev/null +++ b/src/apps/registers/urls.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers URLs.""" + +from registers.urls import * # noqa: F403 diff --git a/src/apps/registers/views.py b/src/apps/registers/views.py new file mode 100644 index 0000000..66ff56e --- /dev/null +++ b/src/apps/registers/views.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated registers views.""" + +from registers.views import * # noqa: F403 diff --git a/src/apps/user/__init__.py b/src/apps/user/__init__.py new file mode 100644 index 0000000..2f46404 --- /dev/null +++ b/src/apps/user/__init__.py @@ -0,0 +1 @@ +"""Compatibility package for legacy apps.user imports.""" diff --git a/src/apps/user/admin.py b/src/apps/user/admin.py new file mode 100644 index 0000000..127b76c --- /dev/null +++ b/src/apps/user/admin.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user admin module.""" + +from user.admin import * # noqa: F403 diff --git a/src/apps/user/apps.py b/src/apps/user/apps.py new file mode 100644 index 0000000..597f1ae --- /dev/null +++ b/src/apps/user/apps.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user app config.""" + +from user.apps import * # noqa: F403 diff --git a/src/apps/user/models.py b/src/apps/user/models.py new file mode 100644 index 0000000..c35871a --- /dev/null +++ b/src/apps/user/models.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user models.""" + +from user.models import * # noqa: F403 diff --git a/src/apps/user/serializers.py b/src/apps/user/serializers.py new file mode 100644 index 0000000..9b7f0a7 --- /dev/null +++ b/src/apps/user/serializers.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user serializers.""" + +from user.serializers import * # noqa: F403 diff --git a/src/apps/user/services.py b/src/apps/user/services.py new file mode 100644 index 0000000..17ef777 --- /dev/null +++ b/src/apps/user/services.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user services.""" + +from user.services import * # noqa: F403 diff --git a/src/apps/user/signals.py b/src/apps/user/signals.py new file mode 100644 index 0000000..6333013 --- /dev/null +++ b/src/apps/user/signals.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user signals.""" + +from user.signals import * # noqa: F403 diff --git a/src/apps/user/urls.py b/src/apps/user/urls.py new file mode 100644 index 0000000..1c5b7cb --- /dev/null +++ b/src/apps/user/urls.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user URLs.""" + +from user.urls import * # noqa: F403 diff --git a/src/apps/user/views.py b/src/apps/user/views.py new file mode 100644 index 0000000..d21fc4f --- /dev/null +++ b/src/apps/user/views.py @@ -0,0 +1,3 @@ +"""Compatibility wrapper for the relocated user views.""" + +from user.views import * # noqa: F403 diff --git a/src/core/api_v1_urls.py b/src/core/api_v1_urls.py index 53a0d54..7fbf28e 100644 --- a/src/core/api_v1_urls.py +++ b/src/core/api_v1_urls.py @@ -19,6 +19,7 @@ API v1 URL configuration. from apps.backups.urls import backups_urlpatterns from apps.core.views import ( + BackgroundJobControlView, BackgroundJobListView, BackgroundJobStatusView, BackgroundJobStreamView, @@ -33,8 +34,8 @@ from apps.parsers.urls import ( system_urlpatterns, zakupki_urlpatterns, ) -from registers.urls import registers_urlpatterns from django.urls import include, path +from registers.urls import registers_urlpatterns app_name = "api_v1" @@ -43,6 +44,11 @@ jobs_urlpatterns = [ path("", BackgroundJobListView.as_view(), name="job-list"), path("/stream/", BackgroundJobStreamView.as_view(), name="job-stream"), path("/", BackgroundJobStatusView.as_view(), name="job-status"), + path( + "/control/", + BackgroundJobControlView.as_view(), + name="job-control", + ), ] urlpatterns = [ @@ -58,6 +64,10 @@ urlpatterns = [ path("zakupki/", include((zakupki_urlpatterns, "zakupki"))), # Парсеры - ФНС бухгалтерская отчетность path("fns/", include((fns_urlpatterns, "fns"))), + # Результаты новых источников без перекрытия старых API выше + path("", include("apps.parsers.api_result_urls", namespace="parser_results")), + # Управление parser Celery задачами и dashboard data + path("parsers/", include("apps.parsers.urls")), # Агрегированные карточки источников для фронтенда path("sources/", include((sources_urlpatterns, "sources"))), # Настройки периодичности парсинга diff --git a/src/core/urls.py b/src/core/urls.py index e896b97..e5e403f 100644 --- a/src/core/urls.py +++ b/src/core/urls.py @@ -4,6 +4,7 @@ URL Configuration for the project. The `urlpatterns` list routes URLs to views. """ +from apps.parsers.views import ParserDashboardPageView from django.conf import settings from django.conf.urls.static import static from django.contrib import admin @@ -45,6 +46,28 @@ urlpatterns = [ schema_view.with_ui("swagger", cache_timeout=0), name="schema-swagger-ui", ), + path("dashboard", ParserDashboardPageView.as_view(), name="dashboard"), + path("dashboard/", ParserDashboardPageView.as_view(), name="dashboard-slash"), + path( + "dashboard/", + ParserDashboardPageView.as_view(), + name="dashboard-source", + ), + path( + "dashboard//", + ParserDashboardPageView.as_view(), + name="dashboard-source-slash", + ), + path( + "dashboard//", + ParserDashboardPageView.as_view(), + name="dashboard-source-item", + ), + path( + "dashboard///", + ParserDashboardPageView.as_view(), + name="dashboard-source-item-slash", + ), path("admin/", admin.site.urls), path("health/", include("apps.core.urls")), path("api/v1/", include("core.api_v1_urls", namespace="api_v1")), diff --git a/src/registers/admin.py b/src/registers/admin.py index f7b2026..20407f3 100644 --- a/src/registers/admin.py +++ b/src/registers/admin.py @@ -1,5 +1,11 @@ """Admin configuration for registers app.""" +from django.contrib import admin, messages +from django.shortcuts import redirect +from django.template.response import TemplateResponse +from django.urls import path, reverse +from rest_framework import serializers + from registers.models import ( Organization, Register, @@ -8,11 +14,6 @@ from registers.models import ( ) from registers.serializers import RegisterFileUploadSerializer from registers.services import RegisterImportError, RegisterImportService -from django.contrib import admin, messages -from django.shortcuts import redirect -from django.template.response import TemplateResponse -from django.urls import path, reverse -from rest_framework import serializers @admin.register(Register) @@ -123,6 +124,8 @@ class RegisterUploadAdmin(admin.ModelAdmin): result = RegisterImportService.sync_registry_memberships( registry=serializer.validated_data["registry"], uploaded_file=uploaded_file, + file_name=uploaded_file.name, + actual_date=serializer.validated_data.get("actual_date"), uploaded_by=request.user, ) except RegisterImportError as exc: diff --git a/src/registers/management/commands/generate_test_data.py b/src/registers/management/commands/generate_test_data.py index 117f64b..c71b2bf 100644 --- a/src/registers/management/commands/generate_test_data.py +++ b/src/registers/management/commands/generate_test_data.py @@ -83,10 +83,11 @@ class Command(BaseAppCommand): mn_inn=1_000_000_000 + (run_seed * 10_000 + org_serial) % 8_000_000_000, ) - register_upload_factory.create(registry=register) + upload = register_upload_factory.create(registry=register) registry_membership_period_factory.create( registry=register, organization=organization, + started_by_upload=upload, ) self.log_info("Создание тестовых данных parser-реестров...") @@ -162,7 +163,7 @@ class Command(BaseAppCommand): Command._ensure_project_root_on_path() try: - registers_factories = import_module("tests.registers.factories") + registers_factories = import_module("tests.apps.registers.factories") parsers_factories = import_module("tests.apps.parsers.factories") except ModuleNotFoundError as exc: raise CommandError( diff --git a/src/registers/migrations/0007_restore_membership_period_fields.py b/src/registers/migrations/0007_restore_membership_period_fields.py new file mode 100644 index 0000000..2be3f17 --- /dev/null +++ b/src/registers/migrations/0007_restore_membership_period_fields.py @@ -0,0 +1,139 @@ +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models +from django.db.models import F, Q + + +class Migration(migrations.Migration): + + dependencies = [ + ("registers", "0006_flat_membership_period"), + ] + + operations = [ + migrations.AddField( + model_name="registerupload", + name="import_error", + field=models.TextField( + blank=True, + help_text="Детализация ошибки, если импорт завершился неуспешно", + null=True, + verbose_name="текст ошибки импорта", + ), + ), + migrations.AddField( + model_name="registerupload", + name="import_status", + field=models.CharField( + choices=[("success", "Успешная"), ("failed", "Ошибка")], + db_index=True, + default="success", + help_text="Результат попытки импорта файла в реестр", + max_length=16, + verbose_name="статус импорта", + ), + ), + migrations.AddField( + model_name="registrymembershipperiod", + name="started_at", + field=models.DateField( + db_index=True, + default=django.utils.timezone.localdate, + help_text="Дата, с которой организация входит в реестр", + verbose_name="дата входа", + ), + ), + migrations.AddField( + model_name="registrymembershipperiod", + name="ended_at", + field=models.DateField( + blank=True, + db_index=True, + help_text="Дата, с которой организация больше не входит в реестр", + null=True, + verbose_name="дата выхода", + ), + ), + migrations.AddField( + model_name="registrymembershipperiod", + name="started_by_upload", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="started_periods", + to="registers.registerupload", + verbose_name="загрузка входа", + ), + ), + migrations.AddField( + model_name="registrymembershipperiod", + name="ended_by_upload", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="ended_periods", + to="registers.registerupload", + verbose_name="загрузка выхода", + ), + ), + migrations.AlterModelOptions( + name="registrymembershipperiod", + options={ + "ordering": ["-started_at", "registry_id"], + "verbose_name": "период участия", + "verbose_name_plural": "периоды участия", + }, + ), + migrations.AddIndex( + model_name="registrymembershipperiod", + index=models.Index( + fields=["registry"], + name="registers_m_registr_974fe0_idx", + ), + ), + migrations.AddIndex( + model_name="registrymembershipperiod", + index=models.Index( + fields=["organization"], + name="registers_m_organiz_fc6221_idx", + ), + ), + migrations.AddIndex( + model_name="registrymembershipperiod", + index=models.Index( + fields=["registry", "started_at"], + name="registers_m_registr_3292a6_idx", + ), + ), + migrations.AddIndex( + model_name="registrymembershipperiod", + index=models.Index( + fields=["registry", "ended_at"], + name="registers_m_registr_edbdd9_idx", + ), + ), + migrations.AddIndex( + model_name="registrymembershipperiod", + index=models.Index( + fields=["organization", "started_at"], + name="registers_m_organiz_138ba3_idx", + ), + ), + migrations.AddConstraint( + model_name="registrymembershipperiod", + constraint=models.UniqueConstraint( + condition=Q(ended_at__isnull=True), + fields=("registry", "organization"), + name="unique_active_membership_period", + ), + ), + migrations.AddConstraint( + model_name="registrymembershipperiod", + constraint=models.CheckConstraint( + check=Q(ended_at__isnull=True) | Q(ended_at__gte=F("started_at")), + name="check_membership_period_dates", + ), + ), + ] diff --git a/src/registers/models.py b/src/registers/models.py index 5124271..23b15ac 100644 --- a/src/registers/models.py +++ b/src/registers/models.py @@ -4,6 +4,8 @@ from apps.core.mixins import TimestampMixin, UUIDPrimaryKeyMixin from django.conf import settings from django.core.validators import RegexValidator from django.db import models +from django.db.models import F, Q +from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -155,7 +157,7 @@ class RegisterUpload(TimestampMixin, models.Model): class RegistryMembershipPeriod(TimestampMixin, models.Model): - """Текущая принадлежность организации к реестру.""" + """Период принадлежности организации к реестру.""" registry = models.ForeignKey( Register, @@ -169,22 +171,62 @@ class RegistryMembershipPeriod(TimestampMixin, models.Model): related_name="membership_periods", verbose_name=_("организация"), ) + started_at = models.DateField( + _("дата входа"), + default=timezone.localdate, + db_index=True, + help_text=_("Дата, с которой организация входит в реестр"), + ) + ended_at = models.DateField( + _("дата выхода"), + null=True, + blank=True, + db_index=True, + help_text=_("Дата, с которой организация больше не входит в реестр"), + ) + started_by_upload = models.ForeignKey( + RegisterUpload, + on_delete=models.PROTECT, + null=True, + blank=True, + related_name="started_periods", + verbose_name=_("загрузка входа"), + ) + ended_by_upload = models.ForeignKey( + RegisterUpload, + on_delete=models.PROTECT, + null=True, + blank=True, + related_name="ended_periods", + verbose_name=_("загрузка выхода"), + ) class Meta: db_table = "registers_membership_period" - verbose_name = _("участие в реестре") - verbose_name_plural = _("участия в реестрах") - ordering = ["registry_id", "organization_id"] + verbose_name = _("период участия") + verbose_name_plural = _("периоды участия") + ordering = ["-started_at", "registry_id"] indexes = [ models.Index(fields=["registry"]), models.Index(fields=["organization"]), + models.Index(fields=["registry", "started_at"]), + models.Index(fields=["registry", "ended_at"]), + models.Index(fields=["organization", "started_at"]), ] constraints = [ models.UniqueConstraint( fields=["registry", "organization"], - name="unique_membership", + condition=Q(ended_at__isnull=True), + name="unique_active_membership_period", + ), + models.CheckConstraint( + check=Q(ended_at__isnull=True) | Q(ended_at__gte=F("started_at")), + name="check_membership_period_dates", ), ] def __str__(self) -> str: - return f"{self.registry.name}: {self.organization.pn_name[:40]}" + return ( + f"{self.registry.name}: {self.organization.pn_name[:40]} " + f"с {self.started_at}" + ) diff --git a/src/registers/serializers.py b/src/registers/serializers.py index fdf64cf..2d82d66 100644 --- a/src/registers/serializers.py +++ b/src/registers/serializers.py @@ -1,11 +1,12 @@ """Сериализаторы для API реестров.""" +from rest_framework import serializers + from registers.models import ( Organization, Register, RegistryMembershipPeriod, ) -from rest_framework import serializers class RegisterSerializer(serializers.ModelSerializer): @@ -42,6 +43,8 @@ class RegistryMembershipPeriodSerializer(serializers.ModelSerializer): "id", "registry_id", "registry_name", + "started_at", + "ended_at", ] read_only_fields = fields @@ -81,6 +84,7 @@ class RegisterFileUploadSerializer(serializers.Serializer): """Сериализатор загрузки файла реестра.""" registry = serializers.PrimaryKeyRelatedField(queryset=Register.objects.all()) + actual_date = serializers.DateField(required=False) file = serializers.FileField() def validate_file(self, value): @@ -96,6 +100,7 @@ class OrganizationListQuerySerializer(serializers.Serializer): queryset=Register.objects.all(), required=False, ) + actual_date = serializers.DateField(required=False) search = serializers.CharField(required=False, allow_blank=True) mn_ogrn = serializers.IntegerField(required=False, min_value=0) mn_inn = serializers.IntegerField(required=False, min_value=0) @@ -107,10 +112,18 @@ class OrganizationListQuerySerializer(serializers.Serializer): raise serializers.ValidationError("mn_okpo должен содержать только цифры") return value + def validate(self, attrs): + if attrs.get("actual_date") and not attrs.get("registry"): + raise serializers.ValidationError( + {"actual_date": "actual_date можно использовать только с registry"} + ) + return attrs + class RegistryOrganizationListQuerySerializer(serializers.Serializer): """Сериализатор query-параметров списка организаций конкретного реестра.""" + actual_date = serializers.DateField(required=False) search = serializers.CharField(required=False, allow_blank=True) mn_ogrn = serializers.IntegerField(required=False, min_value=0) mn_inn = serializers.IntegerField(required=False, min_value=0) diff --git a/src/registers/services.py b/src/registers/services.py index 5c3e1e2..2feb614 100644 --- a/src/registers/services.py +++ b/src/registers/services.py @@ -4,6 +4,13 @@ from __future__ import annotations import hashlib from dataclasses import dataclass +from datetime import date + +from django.db import transaction +from django.db.models import CharField, Q +from django.db.models.functions import Cast +from django.utils import timezone +from openpyxl import load_workbook from registers.models import ( Organization, @@ -11,11 +18,6 @@ from registers.models import ( RegisterUpload, RegistryMembershipPeriod, ) -from django.db import transaction -from django.db.models import CharField, Q -from django.db.models.functions import Cast -from django.utils import timezone -from openpyxl import load_workbook class RegisterImportError(ValueError): @@ -39,13 +41,15 @@ class RegisterImportService: REQUIRED_HEADERS = {"pn_name", "mn_ogrn", "mn_inn", "mn_okpo"} @classmethod - @transaction.atomic def sync_registry_memberships( cls, *, registry: Register, uploaded_file, + file_name: str = "", + actual_date: date | None = None, uploaded_by=None, + uploaded_by_id: int | None = None, ) -> dict[str, int | str]: """ Обновить текущее состояние реестра целиком из загруженного файла. @@ -55,16 +59,25 @@ class RegisterImportService: 2. Полностью заменяем текущее состояние реестра в соответствии с выгруженным списком организаций. """ + snapshot_date = actual_date or timezone.localdate() + cls._validate_snapshot_date(registry=registry, snapshot_date=snapshot_date) file_hash = cls._calculate_file_hash(uploaded_file) - upload = RegisterUpload.objects.create( - registry=registry, - actual_date=timezone.localdate(), - file_name=uploaded_file.name, - file_hash=file_hash, - rows_count=0, - uploaded_by=uploaded_by, - ) + upload_kwargs = { + "registry": registry, + "actual_date": snapshot_date, + "file_name": file_name + or getattr(uploaded_file, "name", "") + or "registry.xlsx", + "file_hash": file_hash, + "rows_count": 0, + } + if uploaded_by is not None: + upload_kwargs["uploaded_by"] = uploaded_by + elif uploaded_by_id is not None: + upload_kwargs["uploaded_by_id"] = uploaded_by_id + + upload = RegisterUpload.objects.create(**upload_kwargs) try: rows = cls._ensure_unique_identities(cls.parse_xlsx(uploaded_file)) @@ -76,42 +89,30 @@ class RegisterImportService: organizations_updated, ) = cls._upsert_organizations(rows) - existing_org_ids = set( - RegistryMembershipPeriod.objects.filter(registry=registry).values_list( - "organization_id", flat=True - ) - ) + active_by_org = cls._get_active_periods_by_org(registry) snapshot_org_ids_set = set(snapshot_org_ids) - - to_remove_org_ids = existing_org_ids - snapshot_org_ids_set - to_add_org_ids = snapshot_org_ids_set - existing_org_ids - - if to_remove_org_ids: - RegistryMembershipPeriod.objects.filter( - registry=registry, - organization_id__in=to_remove_org_ids, - ).delete() - - if to_add_org_ids: - RegistryMembershipPeriod.objects.bulk_create( - [ - RegistryMembershipPeriod( - registry=registry, - organization_id=organization_id, - ) - for organization_id in to_add_org_ids - ], - batch_size=1000, - ignore_conflicts=True, - ) + closed_count = cls._close_missing_periods( + active_by_org=active_by_org, + snapshot_org_ids=snapshot_org_ids_set, + snapshot_date=snapshot_date, + upload=upload, + ) + opened_count = cls._open_new_periods( + registry=registry, + snapshot_org_ids=snapshot_org_ids_set, + active_org_ids=set(active_by_org.keys()), + snapshot_date=snapshot_date, + upload=upload, + ) active_memberships_count = RegistryMembershipPeriod.objects.filter( - registry=registry + registry=registry, + ended_at__isnull=True, ).count() upload.rows_count = len(rows) upload.import_status = RegisterUpload.ImportStatus.SUCCESS - upload.import_error = None + upload.import_error = "" upload.save( update_fields=[ "rows_count", @@ -128,9 +129,11 @@ class RegisterImportService: "rows_in_file": len(rows), "organizations_created": organizations_created, "organizations_updated": organizations_updated, + "memberships_added": opened_count, + "memberships_removed": closed_count, "active_memberships": active_memberships_count, } - except RegisterImportError as exc: + except Exception as exc: upload.import_status = RegisterUpload.ImportStatus.FAILED upload.import_error = str(exc) upload.save( @@ -141,7 +144,9 @@ class RegisterImportService: "updated_at", ] ) - raise + if isinstance(exc, RegisterImportError): + raise + raise RegisterImportError(str(exc)) from exc @classmethod def _upsert_organizations( @@ -208,6 +213,7 @@ class RegisterImportService: cls, *, registry: Register | None = None, + actual_date: date | None = None, search: str = "", mn_ogrn: int | None = None, mn_inn: int | None = None, @@ -215,12 +221,17 @@ class RegisterImportService: mn_okpo: str | None = None, ): """Получить queryset организаций с учетом фильтров по текущему состоянию.""" + resolved_date = cls.resolve_actual_date( + registry=registry, + requested_date=actual_date, + ) queryset = Organization.objects.all().order_by("pn_name") if registry: queryset = cls._filter_organizations_in_registry( queryset=queryset, registry=registry, + actual_date=resolved_date, ) queryset = queryset.distinct() @@ -234,13 +245,14 @@ class RegisterImportService: ) queryset = cls._apply_search(queryset, search.strip()) - return queryset + return queryset, resolved_date @classmethod def get_registry_organizations_queryset( cls, *, registry: Register, + actual_date: date | None = None, search: str = "", mn_ogrn: int | None = None, mn_inn: int | None = None, @@ -248,9 +260,14 @@ class RegisterImportService: mn_okpo: str | None = None, ): """Получить queryset организаций конкретного реестра.""" + resolved_date = cls.resolve_actual_date( + registry=registry, + requested_date=actual_date, + ) queryset = cls._filter_organizations_in_registry( queryset=Organization.objects.all().order_by("pn_name"), registry=registry, + actual_date=resolved_date, ).distinct() queryset = cls._apply_exact_filters( queryset, @@ -261,7 +278,98 @@ class RegisterImportService: ) queryset = cls._apply_search(queryset, search.strip()) - return queryset + return queryset, resolved_date + + @classmethod + def resolve_actual_date( + cls, + *, + registry: Register | None, + requested_date: date | None, + ) -> date: + """Выбрать дату среза для запроса по реестру.""" + if requested_date is not None: + return requested_date + + if registry is not None: + latest_upload = ( + RegisterUpload.objects.filter( + registry=registry, + import_status=RegisterUpload.ImportStatus.SUCCESS, + ) + .order_by("-actual_date") + .first() + ) + if latest_upload is not None: + return latest_upload.actual_date + + return date.today() + + @classmethod + def _get_active_periods_by_org( + cls, + registry: Register, + ) -> dict[int, RegistryMembershipPeriod]: + """Вернуть текущие активные периоды реестра по organization_id.""" + periods = RegistryMembershipPeriod.objects.filter( + registry=registry, + ended_at__isnull=True, + ).order_by("organization_id", "-started_at", "-id") + return {period.organization_id: period for period in periods} + + @classmethod + def _close_missing_periods( + cls, + *, + active_by_org: dict[int, RegistryMembershipPeriod], + snapshot_org_ids: set[int], + snapshot_date: date, + upload: RegisterUpload, + ) -> int: + """Закрыть активные периоды, которых нет в новом снимке.""" + closed_count = 0 + for organization_id, period in active_by_org.items(): + if organization_id in snapshot_org_ids: + continue + + if period.started_at == snapshot_date: + period.delete() + else: + period.ended_at = snapshot_date + period.ended_by_upload = upload + period.save(update_fields=["ended_at", "ended_by_upload", "updated_at"]) + closed_count += 1 + return closed_count + + @classmethod + def _open_new_periods( + cls, + *, + registry: Register, + snapshot_org_ids: set[int], + active_org_ids: set[int], + snapshot_date: date, + upload: RegisterUpload, + ) -> int: + """Открыть периоды для организаций, появившихся в новом снимке.""" + new_org_ids = snapshot_org_ids - active_org_ids + if not new_org_ids: + return 0 + + RegistryMembershipPeriod.objects.bulk_create( + [ + RegistryMembershipPeriod( + registry=registry, + organization_id=organization_id, + started_at=snapshot_date, + started_by_upload=upload, + ) + for organization_id in new_org_ids + ], + batch_size=1000, + ignore_conflicts=True, + ) + return len(new_org_ids) @classmethod def _filter_organizations_in_registry( @@ -269,8 +377,15 @@ class RegisterImportService: *, queryset, registry: Register, + actual_date: date, ): - return queryset.filter(membership_periods__registry=registry) + return queryset.filter( + membership_periods__registry=registry, + membership_periods__started_at__lte=actual_date, + ).filter( + Q(membership_periods__ended_at__isnull=True) + | Q(membership_periods__ended_at__gt=actual_date) + ) @classmethod def _apply_exact_filters( @@ -380,6 +495,26 @@ class RegisterImportService: uploaded_file.seek(0) return hashlib.sha256(file_content).hexdigest() + @classmethod + def _validate_snapshot_date( + cls, + *, + registry: Register, + snapshot_date: date, + ) -> None: + latest = ( + RegisterUpload.objects.filter( + registry=registry, + import_status=RegisterUpload.ImportStatus.SUCCESS, + ) + .order_by("-actual_date") + .first() + ) + if latest and snapshot_date < latest.actual_date: + raise RegisterImportError( + "Дата актуальности не может быть раньше последней загрузки" + ) + @classmethod def _ensure_unique_identities( cls, diff --git a/src/registers/urls.py b/src/registers/urls.py index efe8833..ffdda4e 100644 --- a/src/registers/urls.py +++ b/src/registers/urls.py @@ -1,13 +1,14 @@ """URL конфигурация для приложения реестров.""" +from django.urls import include, path +from rest_framework.routers import DefaultRouter + from registers.views import ( OrganizationViewSet, RegisterUploadView, RegisterViewSet, RegistryOrganizationListView, ) -from django.urls import include, path -from rest_framework.routers import DefaultRouter app_name = "registers" diff --git a/src/registers/views.py b/src/registers/views.py index f6a0962..8684ad4 100644 --- a/src/registers/views.py +++ b/src/registers/views.py @@ -3,6 +3,18 @@ from __future__ import annotations from apps.core.openapi import CommonResponses, ErrorResponses, swagger_tag +from django.shortcuts import get_object_or_404 +from drf_yasg import openapi +from drf_yasg.utils import swagger_auto_schema +from rest_framework import status +from rest_framework.exceptions import ValidationError +from rest_framework.generics import ListAPIView +from rest_framework.parsers import MultiPartParser +from rest_framework.permissions import IsAdminUser, IsAuthenticated +from rest_framework.response import Response +from rest_framework.views import APIView +from rest_framework.viewsets import ReadOnlyModelViewSet + from registers.models import Organization, Register from registers.pagination import RegistersPagination from registers.serializers import ( @@ -16,17 +28,6 @@ from registers.serializers import ( RegistryOrganizationListQuerySerializer, ) from registers.services import RegisterImportError, RegisterImportService -from django.shortcuts import get_object_or_404 -from drf_yasg import openapi -from drf_yasg.utils import swagger_auto_schema -from rest_framework import status -from rest_framework.exceptions import ValidationError -from rest_framework.generics import ListAPIView -from rest_framework.parsers import MultiPartParser -from rest_framework.permissions import IsAdminUser, IsAuthenticated -from rest_framework.response import Response -from rest_framework.views import APIView -from rest_framework.viewsets import ReadOnlyModelViewSet REGISTERS_TAG = swagger_tag("Реестры организаций", "registers") @@ -97,7 +98,7 @@ class OrganizationViewSet(ReadOnlyModelViewSet): ) params_serializer.is_valid(raise_exception=True) - queryset = RegisterImportService.get_organizations_queryset( + queryset, _resolved_date = RegisterImportService.get_organizations_queryset( **params_serializer.validated_data ) @@ -120,6 +121,14 @@ class OrganizationViewSet(ReadOnlyModelViewSet): required=False, description="UUID реестра для среза", ), + openapi.Parameter( + name="actual_date", + in_=openapi.IN_QUERY, + type=openapi.TYPE_STRING, + format=openapi.FORMAT_DATE, + required=False, + description="Дата среза, используется вместе с registry", + ), openapi.Parameter( name="search", in_=openapi.IN_QUERY, @@ -195,7 +204,10 @@ class RegistryOrganizationListView(ListAPIView): ) params_serializer.is_valid(raise_exception=True) - queryset = RegisterImportService.get_registry_organizations_queryset( + ( + queryset, + _resolved_date, + ) = RegisterImportService.get_registry_organizations_queryset( registry=registry, **params_serializer.validated_data, ) @@ -211,6 +223,14 @@ class RegistryOrganizationListView(ListAPIView): "этого реестра." ), manual_parameters=[ + openapi.Parameter( + name="actual_date", + in_=openapi.IN_QUERY, + type=openapi.TYPE_STRING, + format=openapi.FORMAT_DATE, + required=False, + description="Дата среза реестра", + ), openapi.Parameter( name="search", in_=openapi.IN_QUERY, @@ -286,6 +306,14 @@ class RegisterUploadView(APIView): required=True, description="Excel файл с организациями", ), + openapi.Parameter( + name="actual_date", + in_=openapi.IN_FORM, + type=openapi.TYPE_STRING, + format=openapi.FORMAT_DATE, + required=False, + description="Дата актуальности среза", + ), ], consumes=["multipart/form-data"], responses={ @@ -300,11 +328,14 @@ class RegisterUploadView(APIView): registry = serializer.validated_data["registry"] uploaded_file = serializer.validated_data["file"] + actual_date = serializer.validated_data.get("actual_date") try: RegisterImportService.sync_registry_memberships( registry=registry, uploaded_file=uploaded_file, + file_name=uploaded_file.name, + actual_date=actual_date, uploaded_by=request.user, ) except RegisterImportError as exc: diff --git a/src/static/admin/js/vendor/particles.min.js b/src/static/admin/js/vendor/particles.min.js index b3d46d1..1b204b7 100644 --- a/src/static/admin/js/vendor/particles.min.js +++ b/src/static/admin/js/vendor/particles.min.js @@ -6,4 +6,4 @@ /* How to use? : Check the GitHub README /* v2.0.0 /* ----------------------------------------------- */ -function hexToRgb(e){var a=/^#?([a-f\d])([a-f\d])([a-f\d])$/i;e=e.replace(a,function(e,a,t,i){return a+a+t+t+i+i});var t=/^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(e);return t?{r:parseInt(t[1],16),g:parseInt(t[2],16),b:parseInt(t[3],16)}:null}function clamp(e,a,t){return Math.min(Math.max(e,a),t)}function isInArray(e,a){return a.indexOf(e)>-1}var pJS=function(e,a){var t=document.querySelector("#"+e+" > .particles-js-canvas-el");this.pJS={canvas:{el:t,w:t.offsetWidth,h:t.offsetHeight},particles:{number:{value:400,density:{enable:!0,value_area:800}},color:{value:"#fff"},shape:{type:"circle",stroke:{width:0,color:"#ff0000"},polygon:{nb_sides:5},image:{src:"",width:100,height:100}},opacity:{value:1,random:!1,anim:{enable:!1,speed:2,opacity_min:0,sync:!1}},size:{value:20,random:!1,anim:{enable:!1,speed:20,size_min:0,sync:!1}},line_linked:{enable:!0,distance:100,color:"#fff",opacity:1,width:1},move:{enable:!0,speed:2,direction:"none",random:!1,straight:!1,out_mode:"out",bounce:!1,attract:{enable:!1,rotateX:3e3,rotateY:3e3}},array:[]},interactivity:{detect_on:"canvas",events:{onhover:{enable:!0,mode:"grab"},onclick:{enable:!0,mode:"push"},resize:!0},modes:{grab:{distance:100,line_linked:{opacity:1}},bubble:{distance:200,size:80,duration:.4},repulse:{distance:200,duration:.4},push:{particles_nb:4},remove:{particles_nb:2}},mouse:{}},retina_detect:!1,fn:{interact:{},modes:{},vendors:{}},tmp:{}};var i=this.pJS;a&&Object.deepExtend(i,a),i.tmp.obj={size_value:i.particles.size.value,size_anim_speed:i.particles.size.anim.speed,move_speed:i.particles.move.speed,line_linked_distance:i.particles.line_linked.distance,line_linked_width:i.particles.line_linked.width,mode_grab_distance:i.interactivity.modes.grab.distance,mode_bubble_distance:i.interactivity.modes.bubble.distance,mode_bubble_size:i.interactivity.modes.bubble.size,mode_repulse_distance:i.interactivity.modes.repulse.distance},i.fn.retinaInit=function(){i.retina_detect&&window.devicePixelRatio>1?(i.canvas.pxratio=window.devicePixelRatio,i.tmp.retina=!0):(i.canvas.pxratio=1,i.tmp.retina=!1),i.canvas.w=i.canvas.el.offsetWidth*i.canvas.pxratio,i.canvas.h=i.canvas.el.offsetHeight*i.canvas.pxratio,i.particles.size.value=i.tmp.obj.size_value*i.canvas.pxratio,i.particles.size.anim.speed=i.tmp.obj.size_anim_speed*i.canvas.pxratio,i.particles.move.speed=i.tmp.obj.move_speed*i.canvas.pxratio,i.particles.line_linked.distance=i.tmp.obj.line_linked_distance*i.canvas.pxratio,i.interactivity.modes.grab.distance=i.tmp.obj.mode_grab_distance*i.canvas.pxratio,i.interactivity.modes.bubble.distance=i.tmp.obj.mode_bubble_distance*i.canvas.pxratio,i.particles.line_linked.width=i.tmp.obj.line_linked_width*i.canvas.pxratio,i.interactivity.modes.bubble.size=i.tmp.obj.mode_bubble_size*i.canvas.pxratio,i.interactivity.modes.repulse.distance=i.tmp.obj.mode_repulse_distance*i.canvas.pxratio},i.fn.canvasInit=function(){i.canvas.ctx=i.canvas.el.getContext("2d")},i.fn.canvasSize=function(){i.canvas.el.width=i.canvas.w,i.canvas.el.height=i.canvas.h,i&&i.interactivity.events.resize&&window.addEventListener("resize",function(){i.canvas.w=i.canvas.el.offsetWidth,i.canvas.h=i.canvas.el.offsetHeight,i.tmp.retina&&(i.canvas.w*=i.canvas.pxratio,i.canvas.h*=i.canvas.pxratio),i.canvas.el.width=i.canvas.w,i.canvas.el.height=i.canvas.h,i.particles.move.enable||(i.fn.particlesEmpty(),i.fn.particlesCreate(),i.fn.particlesDraw(),i.fn.vendors.densityAutoParticles()),i.fn.vendors.densityAutoParticles()})},i.fn.canvasPaint=function(){i.canvas.ctx.fillRect(0,0,i.canvas.w,i.canvas.h)},i.fn.canvasClear=function(){i.canvas.ctx.clearRect(0,0,i.canvas.w,i.canvas.h)},i.fn.particle=function(e,a,t){if(this.radius=(i.particles.size.random?Math.random():1)*i.particles.size.value,i.particles.size.anim.enable&&(this.size_status=!1,this.vs=i.particles.size.anim.speed/100,i.particles.size.anim.sync||(this.vs=this.vs*Math.random())),this.x=t?t.x:Math.random()*i.canvas.w,this.y=t?t.y:Math.random()*i.canvas.h,this.x>i.canvas.w-2*this.radius?this.x=this.x-this.radius:this.x<2*this.radius&&(this.x=this.x+this.radius),this.y>i.canvas.h-2*this.radius?this.y=this.y-this.radius:this.y<2*this.radius&&(this.y=this.y+this.radius),i.particles.move.bounce&&i.fn.vendors.checkOverlap(this,t),this.color={},"object"==typeof e.value)if(e.value instanceof Array){var s=e.value[Math.floor(Math.random()*i.particles.color.value.length)];this.color.rgb=hexToRgb(s)}else void 0!=e.value.r&&void 0!=e.value.g&&void 0!=e.value.b&&(this.color.rgb={r:e.value.r,g:e.value.g,b:e.value.b}),void 0!=e.value.h&&void 0!=e.value.s&&void 0!=e.value.l&&(this.color.hsl={h:e.value.h,s:e.value.s,l:e.value.l});else"random"==e.value?this.color.rgb={r:Math.floor(256*Math.random())+0,g:Math.floor(256*Math.random())+0,b:Math.floor(256*Math.random())+0}:"string"==typeof e.value&&(this.color=e,this.color.rgb=hexToRgb(this.color.value));this.opacity=(i.particles.opacity.random?Math.random():1)*i.particles.opacity.value,i.particles.opacity.anim.enable&&(this.opacity_status=!1,this.vo=i.particles.opacity.anim.speed/100,i.particles.opacity.anim.sync||(this.vo=this.vo*Math.random()));var n={};switch(i.particles.move.direction){case"top":n={x:0,y:-1};break;case"top-right":n={x:.5,y:-.5};break;case"right":n={x:1,y:-0};break;case"bottom-right":n={x:.5,y:.5};break;case"bottom":n={x:0,y:1};break;case"bottom-left":n={x:-.5,y:1};break;case"left":n={x:-1,y:0};break;case"top-left":n={x:-.5,y:-.5};break;default:n={x:0,y:0}}i.particles.move.straight?(this.vx=n.x,this.vy=n.y,i.particles.move.random&&(this.vx=this.vx*Math.random(),this.vy=this.vy*Math.random())):(this.vx=n.x+Math.random()-.5,this.vy=n.y+Math.random()-.5),this.vx_i=this.vx,this.vy_i=this.vy;var r=i.particles.shape.type;if("object"==typeof r){if(r instanceof Array){var c=r[Math.floor(Math.random()*r.length)];this.shape=c}}else this.shape=r;if("image"==this.shape){var o=i.particles.shape;this.img={src:o.image.src,ratio:o.image.width/o.image.height},this.img.ratio||(this.img.ratio=1),"svg"==i.tmp.img_type&&void 0!=i.tmp.source_svg&&(i.fn.vendors.createSvgImg(this),i.tmp.pushing&&(this.img.loaded=!1))}},i.fn.particle.prototype.draw=function(){function e(){i.canvas.ctx.drawImage(r,a.x-t,a.y-t,2*t,2*t/a.img.ratio)}var a=this;if(void 0!=a.radius_bubble)var t=a.radius_bubble;else var t=a.radius;if(void 0!=a.opacity_bubble)var s=a.opacity_bubble;else var s=a.opacity;if(a.color.rgb)var n="rgba("+a.color.rgb.r+","+a.color.rgb.g+","+a.color.rgb.b+","+s+")";else var n="hsla("+a.color.hsl.h+","+a.color.hsl.s+"%,"+a.color.hsl.l+"%,"+s+")";switch(i.canvas.ctx.fillStyle=n,i.canvas.ctx.beginPath(),a.shape){case"circle":i.canvas.ctx.arc(a.x,a.y,t,0,2*Math.PI,!1);break;case"edge":i.canvas.ctx.rect(a.x-t,a.y-t,2*t,2*t);break;case"triangle":i.fn.vendors.drawShape(i.canvas.ctx,a.x-t,a.y+t/1.66,2*t,3,2);break;case"polygon":i.fn.vendors.drawShape(i.canvas.ctx,a.x-t/(i.particles.shape.polygon.nb_sides/3.5),a.y-t/.76,2.66*t/(i.particles.shape.polygon.nb_sides/3),i.particles.shape.polygon.nb_sides,1);break;case"star":i.fn.vendors.drawShape(i.canvas.ctx,a.x-2*t/(i.particles.shape.polygon.nb_sides/4),a.y-t/1.52,2*t*2.66/(i.particles.shape.polygon.nb_sides/3),i.particles.shape.polygon.nb_sides,2);break;case"image":if("svg"==i.tmp.img_type)var r=a.img.obj;else var r=i.tmp.img_obj;r&&e()}i.canvas.ctx.closePath(),i.particles.shape.stroke.width>0&&(i.canvas.ctx.strokeStyle=i.particles.shape.stroke.color,i.canvas.ctx.lineWidth=i.particles.shape.stroke.width,i.canvas.ctx.stroke()),i.canvas.ctx.fill()},i.fn.particlesCreate=function(){for(var e=0;e=i.particles.opacity.value&&(a.opacity_status=!1),a.opacity+=a.vo):(a.opacity<=i.particles.opacity.anim.opacity_min&&(a.opacity_status=!0),a.opacity-=a.vo),a.opacity<0&&(a.opacity=0)),i.particles.size.anim.enable&&(1==a.size_status?(a.radius>=i.particles.size.value&&(a.size_status=!1),a.radius+=a.vs):(a.radius<=i.particles.size.anim.size_min&&(a.size_status=!0),a.radius-=a.vs),a.radius<0&&(a.radius=0)),"bounce"==i.particles.move.out_mode)var s={x_left:a.radius,x_right:i.canvas.w,y_top:a.radius,y_bottom:i.canvas.h};else var s={x_left:-a.radius,x_right:i.canvas.w+a.radius,y_top:-a.radius,y_bottom:i.canvas.h+a.radius};switch(a.x-a.radius>i.canvas.w?(a.x=s.x_left,a.y=Math.random()*i.canvas.h):a.x+a.radius<0&&(a.x=s.x_right,a.y=Math.random()*i.canvas.h),a.y-a.radius>i.canvas.h?(a.y=s.y_top,a.x=Math.random()*i.canvas.w):a.y+a.radius<0&&(a.y=s.y_bottom,a.x=Math.random()*i.canvas.w),i.particles.move.out_mode){case"bounce":a.x+a.radius>i.canvas.w?a.vx=-a.vx:a.x-a.radius<0&&(a.vx=-a.vx),a.y+a.radius>i.canvas.h?a.vy=-a.vy:a.y-a.radius<0&&(a.vy=-a.vy)}if(isInArray("grab",i.interactivity.events.onhover.mode)&&i.fn.modes.grabParticle(a),(isInArray("bubble",i.interactivity.events.onhover.mode)||isInArray("bubble",i.interactivity.events.onclick.mode))&&i.fn.modes.bubbleParticle(a),(isInArray("repulse",i.interactivity.events.onhover.mode)||isInArray("repulse",i.interactivity.events.onclick.mode))&&i.fn.modes.repulseParticle(a),i.particles.line_linked.enable||i.particles.move.attract.enable)for(var n=e+1;n0){var c=i.particles.line_linked.color_rgb_line;i.canvas.ctx.strokeStyle="rgba("+c.r+","+c.g+","+c.b+","+r+")",i.canvas.ctx.lineWidth=i.particles.line_linked.width,i.canvas.ctx.beginPath(),i.canvas.ctx.moveTo(e.x,e.y),i.canvas.ctx.lineTo(a.x,a.y),i.canvas.ctx.stroke(),i.canvas.ctx.closePath()}}},i.fn.interact.attractParticles=function(e,a){var t=e.x-a.x,s=e.y-a.y,n=Math.sqrt(t*t+s*s);if(n<=i.particles.line_linked.distance){var r=t/(1e3*i.particles.move.attract.rotateX),c=s/(1e3*i.particles.move.attract.rotateY);e.vx-=r,e.vy-=c,a.vx+=r,a.vy+=c}},i.fn.interact.bounceParticles=function(e,a){var t=e.x-a.x,i=e.y-a.y,s=Math.sqrt(t*t+i*i),n=e.radius+a.radius;n>=s&&(e.vx=-e.vx,e.vy=-e.vy,a.vx=-a.vx,a.vy=-a.vy)},i.fn.modes.pushParticles=function(e,a){i.tmp.pushing=!0;for(var t=0;e>t;t++)i.particles.array.push(new i.fn.particle(i.particles.color,i.particles.opacity.value,{x:a?a.pos_x:Math.random()*i.canvas.w,y:a?a.pos_y:Math.random()*i.canvas.h})),t==e-1&&(i.particles.move.enable||i.fn.particlesDraw(),i.tmp.pushing=!1)},i.fn.modes.removeParticles=function(e){i.particles.array.splice(0,e),i.particles.move.enable||i.fn.particlesDraw()},i.fn.modes.bubbleParticle=function(e){function a(){e.opacity_bubble=e.opacity,e.radius_bubble=e.radius}function t(a,t,s,n,c){if(a!=t)if(i.tmp.bubble_duration_end){if(void 0!=s){var o=n-p*(n-a)/i.interactivity.modes.bubble.duration,l=a-o;d=a+l,"size"==c&&(e.radius_bubble=d),"opacity"==c&&(e.opacity_bubble=d)}}else if(r<=i.interactivity.modes.bubble.distance){if(void 0!=s)var v=s;else var v=n;if(v!=a){var d=n-p*(n-a)/i.interactivity.modes.bubble.duration;"size"==c&&(e.radius_bubble=d),"opacity"==c&&(e.opacity_bubble=d)}}else"size"==c&&(e.radius_bubble=void 0),"opacity"==c&&(e.opacity_bubble=void 0)}if(i.interactivity.events.onhover.enable&&isInArray("bubble",i.interactivity.events.onhover.mode)){var s=e.x-i.interactivity.mouse.pos_x,n=e.y-i.interactivity.mouse.pos_y,r=Math.sqrt(s*s+n*n),c=1-r/i.interactivity.modes.bubble.distance;if(r<=i.interactivity.modes.bubble.distance){if(c>=0&&"mousemove"==i.interactivity.status){if(i.interactivity.modes.bubble.size!=i.particles.size.value)if(i.interactivity.modes.bubble.size>i.particles.size.value){var o=e.radius+i.interactivity.modes.bubble.size*c;o>=0&&(e.radius_bubble=o)}else{var l=e.radius-i.interactivity.modes.bubble.size,o=e.radius-l*c;o>0?e.radius_bubble=o:e.radius_bubble=0}if(i.interactivity.modes.bubble.opacity!=i.particles.opacity.value)if(i.interactivity.modes.bubble.opacity>i.particles.opacity.value){var v=i.interactivity.modes.bubble.opacity*c;v>e.opacity&&v<=i.interactivity.modes.bubble.opacity&&(e.opacity_bubble=v)}else{var v=e.opacity-(i.particles.opacity.value-i.interactivity.modes.bubble.opacity)*c;v=i.interactivity.modes.bubble.opacity&&(e.opacity_bubble=v)}}}else a();"mouseleave"==i.interactivity.status&&a()}else if(i.interactivity.events.onclick.enable&&isInArray("bubble",i.interactivity.events.onclick.mode)){if(i.tmp.bubble_clicking){var s=e.x-i.interactivity.mouse.click_pos_x,n=e.y-i.interactivity.mouse.click_pos_y,r=Math.sqrt(s*s+n*n),p=((new Date).getTime()-i.interactivity.mouse.click_time)/1e3;p>i.interactivity.modes.bubble.duration&&(i.tmp.bubble_duration_end=!0),p>2*i.interactivity.modes.bubble.duration&&(i.tmp.bubble_clicking=!1,i.tmp.bubble_duration_end=!1)}i.tmp.bubble_clicking&&(t(i.interactivity.modes.bubble.size,i.particles.size.value,e.radius_bubble,e.radius,"size"),t(i.interactivity.modes.bubble.opacity,i.particles.opacity.value,e.opacity_bubble,e.opacity,"opacity"))}},i.fn.modes.repulseParticle=function(e){function a(){var a=Math.atan2(d,p);if(e.vx=u*Math.cos(a),e.vy=u*Math.sin(a),"bounce"==i.particles.move.out_mode){var t={x:e.x+e.vx,y:e.y+e.vy};t.x+e.radius>i.canvas.w?e.vx=-e.vx:t.x-e.radius<0&&(e.vx=-e.vx),t.y+e.radius>i.canvas.h?e.vy=-e.vy:t.y-e.radius<0&&(e.vy=-e.vy)}}if(i.interactivity.events.onhover.enable&&isInArray("repulse",i.interactivity.events.onhover.mode)&&"mousemove"==i.interactivity.status){var t=e.x-i.interactivity.mouse.pos_x,s=e.y-i.interactivity.mouse.pos_y,n=Math.sqrt(t*t+s*s),r={x:t/n,y:s/n},c=i.interactivity.modes.repulse.distance,o=100,l=clamp(1/c*(-1*Math.pow(n/c,2)+1)*c*o,0,50),v={x:e.x+r.x*l,y:e.y+r.y*l};"bounce"==i.particles.move.out_mode?(v.x-e.radius>0&&v.x+e.radius0&&v.y+e.radius=m&&a()}else 0==i.tmp.repulse_clicking&&(e.vx=e.vx_i,e.vy=e.vy_i)},i.fn.modes.grabParticle=function(e){if(i.interactivity.events.onhover.enable&&"mousemove"==i.interactivity.status){var a=e.x-i.interactivity.mouse.pos_x,t=e.y-i.interactivity.mouse.pos_y,s=Math.sqrt(a*a+t*t);if(s<=i.interactivity.modes.grab.distance){var n=i.interactivity.modes.grab.line_linked.opacity-s/(1/i.interactivity.modes.grab.line_linked.opacity)/i.interactivity.modes.grab.distance;if(n>0){var r=i.particles.line_linked.color_rgb_line;i.canvas.ctx.strokeStyle="rgba("+r.r+","+r.g+","+r.b+","+n+")",i.canvas.ctx.lineWidth=i.particles.line_linked.width,i.canvas.ctx.beginPath(),i.canvas.ctx.moveTo(e.x,e.y),i.canvas.ctx.lineTo(i.interactivity.mouse.pos_x,i.interactivity.mouse.pos_y),i.canvas.ctx.stroke(),i.canvas.ctx.closePath()}}}},i.fn.vendors.eventsListeners=function(){"window"==i.interactivity.detect_on?i.interactivity.el=window:i.interactivity.el=i.canvas.el,(i.interactivity.events.onhover.enable||i.interactivity.events.onclick.enable)&&(i.interactivity.el.addEventListener("mousemove",function(e){if(i.interactivity.el==window)var a=e.clientX,t=e.clientY;else var a=e.offsetX||e.clientX,t=e.offsetY||e.clientY;i.interactivity.mouse.pos_x=a,i.interactivity.mouse.pos_y=t,i.tmp.retina&&(i.interactivity.mouse.pos_x*=i.canvas.pxratio,i.interactivity.mouse.pos_y*=i.canvas.pxratio),i.interactivity.status="mousemove"}),i.interactivity.el.addEventListener("mouseleave",function(e){i.interactivity.mouse.pos_x=null,i.interactivity.mouse.pos_y=null,i.interactivity.status="mouseleave"})),i.interactivity.events.onclick.enable&&i.interactivity.el.addEventListener("click",function(){if(i.interactivity.mouse.click_pos_x=i.interactivity.mouse.pos_x,i.interactivity.mouse.click_pos_y=i.interactivity.mouse.pos_y,i.interactivity.mouse.click_time=(new Date).getTime(),i.interactivity.events.onclick.enable)switch(i.interactivity.events.onclick.mode){case"push":i.particles.move.enable?i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb,i.interactivity.mouse):1==i.interactivity.modes.push.particles_nb?i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb,i.interactivity.mouse):i.interactivity.modes.push.particles_nb>1&&i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb);break;case"remove":i.fn.modes.removeParticles(i.interactivity.modes.remove.particles_nb);break;case"bubble":i.tmp.bubble_clicking=!0;break;case"repulse":i.tmp.repulse_clicking=!0,i.tmp.repulse_count=0,i.tmp.repulse_finish=!1,setTimeout(function(){i.tmp.repulse_clicking=!1},1e3*i.interactivity.modes.repulse.duration)}})},i.fn.vendors.densityAutoParticles=function(){if(i.particles.number.density.enable){var e=i.canvas.el.width*i.canvas.el.height/1e3;i.tmp.retina&&(e/=2*i.canvas.pxratio);var a=e*i.particles.number.value/i.particles.number.density.value_area,t=i.particles.array.length-a;0>t?i.fn.modes.pushParticles(Math.abs(t)):i.fn.modes.removeParticles(t)}},i.fn.vendors.checkOverlap=function(e,a){for(var t=0;tv;v++)e.lineTo(i,0),e.translate(i,0),e.rotate(l);e.fill(),e.restore()},i.fn.vendors.exportImg=function(){window.open(i.canvas.el.toDataURL("image/png"),"_blank")},i.fn.vendors.loadImg=function(e){if(i.tmp.img_error=void 0,""!=i.particles.shape.image.src)if("svg"==e){var a=new XMLHttpRequest;a.open("GET",i.particles.shape.image.src),a.onreadystatechange=function(e){4==a.readyState&&(200==a.status?(i.tmp.source_svg=e.currentTarget.response,i.fn.vendors.checkBeforeDraw()):(console.log("Error pJS - Image not found"),i.tmp.img_error=!0))},a.send()}else{var t=new Image;t.addEventListener("load",function(){i.tmp.img_obj=t,i.fn.vendors.checkBeforeDraw()}),t.src=i.particles.shape.image.src}else console.log("Error pJS - No image.src"),i.tmp.img_error=!0},i.fn.vendors.draw=function(){"image"==i.particles.shape.type?"svg"==i.tmp.img_type?i.tmp.count_svg>=i.particles.number.value?(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame)):i.tmp.img_error||(i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw)):void 0!=i.tmp.img_obj?(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame)):i.tmp.img_error||(i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw)):(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame))},i.fn.vendors.checkBeforeDraw=function(){"image"==i.particles.shape.type?"svg"==i.tmp.img_type&&void 0==i.tmp.source_svg?i.tmp.checkAnimFrame=requestAnimFrame(check):(cancelRequestAnimFrame(i.tmp.checkAnimFrame),i.tmp.img_error||(i.fn.vendors.init(),i.fn.vendors.draw())):(i.fn.vendors.init(),i.fn.vendors.draw())},i.fn.vendors.init=function(){i.fn.retinaInit(),i.fn.canvasInit(),i.fn.canvasSize(),i.fn.canvasPaint(),i.fn.particlesCreate(),i.fn.vendors.densityAutoParticles(),i.particles.line_linked.color_rgb_line=hexToRgb(i.particles.line_linked.color)},i.fn.vendors.start=function(){isInArray("image",i.particles.shape.type)?(i.tmp.img_type=i.particles.shape.image.src.substr(i.particles.shape.image.src.length-3),i.fn.vendors.loadImg(i.tmp.img_type)):i.fn.vendors.checkBeforeDraw()},i.fn.vendors.eventsListeners(),i.fn.vendors.start()};Object.deepExtend=function(e,a){for(var t in a)a[t]&&a[t].constructor&&a[t].constructor===Object?(e[t]=e[t]||{},arguments.callee(e[t],a[t])):e[t]=a[t];return e},window.requestAnimFrame=function(){return window.requestAnimationFrame||window.webkitRequestAnimationFrame||window.mozRequestAnimationFrame||window.oRequestAnimationFrame||window.msRequestAnimationFrame||function(e){window.setTimeout(e,1e3/60)}}(),window.cancelRequestAnimFrame=function(){return window.cancelAnimationFrame||window.webkitCancelRequestAnimationFrame||window.mozCancelRequestAnimationFrame||window.oCancelRequestAnimationFrame||window.msCancelRequestAnimationFrame||clearTimeout}(),window.pJSDom=[],window.particlesJS=function(e,a){"string"!=typeof e&&(a=e,e="particles-js"),e||(e="particles-js");var t=document.getElementById(e),i="particles-js-canvas-el",s=t.getElementsByClassName(i);if(s.length)for(;s.length>0;)t.removeChild(s[0]);var n=document.createElement("canvas");n.className=i,n.style.width="100%",n.style.height="100%";var r=document.getElementById(e).appendChild(n);null!=r&&pJSDom.push(new pJS(e,a))},window.particlesJS.load=function(e,a,t){var i=new XMLHttpRequest;i.open("GET",a),i.onreadystatechange=function(a){if(4==i.readyState)if(200==i.status){var s=JSON.parse(a.currentTarget.response);window.particlesJS(e,s),t&&t()}else console.log("Error pJS - XMLHttpRequest status: "+i.status),console.log("Error pJS - File config not found")},i.send()}; \ No newline at end of file +function hexToRgb(e){var a=/^#?([a-f\d])([a-f\d])([a-f\d])$/i;e=e.replace(a,function(e,a,t,i){return a+a+t+t+i+i});var t=/^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(e);return t?{r:parseInt(t[1],16),g:parseInt(t[2],16),b:parseInt(t[3],16)}:null}function clamp(e,a,t){return Math.min(Math.max(e,a),t)}function isInArray(e,a){return a.indexOf(e)>-1}var pJS=function(e,a){var t=document.querySelector("#"+e+" > .particles-js-canvas-el");this.pJS={canvas:{el:t,w:t.offsetWidth,h:t.offsetHeight},particles:{number:{value:400,density:{enable:!0,value_area:800}},color:{value:"#fff"},shape:{type:"circle",stroke:{width:0,color:"#ff0000"},polygon:{nb_sides:5},image:{src:"",width:100,height:100}},opacity:{value:1,random:!1,anim:{enable:!1,speed:2,opacity_min:0,sync:!1}},size:{value:20,random:!1,anim:{enable:!1,speed:20,size_min:0,sync:!1}},line_linked:{enable:!0,distance:100,color:"#fff",opacity:1,width:1},move:{enable:!0,speed:2,direction:"none",random:!1,straight:!1,out_mode:"out",bounce:!1,attract:{enable:!1,rotateX:3e3,rotateY:3e3}},array:[]},interactivity:{detect_on:"canvas",events:{onhover:{enable:!0,mode:"grab"},onclick:{enable:!0,mode:"push"},resize:!0},modes:{grab:{distance:100,line_linked:{opacity:1}},bubble:{distance:200,size:80,duration:.4},repulse:{distance:200,duration:.4},push:{particles_nb:4},remove:{particles_nb:2}},mouse:{}},retina_detect:!1,fn:{interact:{},modes:{},vendors:{}},tmp:{}};var i=this.pJS;a&&Object.deepExtend(i,a),i.tmp.obj={size_value:i.particles.size.value,size_anim_speed:i.particles.size.anim.speed,move_speed:i.particles.move.speed,line_linked_distance:i.particles.line_linked.distance,line_linked_width:i.particles.line_linked.width,mode_grab_distance:i.interactivity.modes.grab.distance,mode_bubble_distance:i.interactivity.modes.bubble.distance,mode_bubble_size:i.interactivity.modes.bubble.size,mode_repulse_distance:i.interactivity.modes.repulse.distance},i.fn.retinaInit=function(){i.retina_detect&&window.devicePixelRatio>1?(i.canvas.pxratio=window.devicePixelRatio,i.tmp.retina=!0):(i.canvas.pxratio=1,i.tmp.retina=!1),i.canvas.w=i.canvas.el.offsetWidth*i.canvas.pxratio,i.canvas.h=i.canvas.el.offsetHeight*i.canvas.pxratio,i.particles.size.value=i.tmp.obj.size_value*i.canvas.pxratio,i.particles.size.anim.speed=i.tmp.obj.size_anim_speed*i.canvas.pxratio,i.particles.move.speed=i.tmp.obj.move_speed*i.canvas.pxratio,i.particles.line_linked.distance=i.tmp.obj.line_linked_distance*i.canvas.pxratio,i.interactivity.modes.grab.distance=i.tmp.obj.mode_grab_distance*i.canvas.pxratio,i.interactivity.modes.bubble.distance=i.tmp.obj.mode_bubble_distance*i.canvas.pxratio,i.particles.line_linked.width=i.tmp.obj.line_linked_width*i.canvas.pxratio,i.interactivity.modes.bubble.size=i.tmp.obj.mode_bubble_size*i.canvas.pxratio,i.interactivity.modes.repulse.distance=i.tmp.obj.mode_repulse_distance*i.canvas.pxratio},i.fn.canvasInit=function(){i.canvas.ctx=i.canvas.el.getContext("2d")},i.fn.canvasSize=function(){i.canvas.el.width=i.canvas.w,i.canvas.el.height=i.canvas.h,i&&i.interactivity.events.resize&&window.addEventListener("resize",function(){i.canvas.w=i.canvas.el.offsetWidth,i.canvas.h=i.canvas.el.offsetHeight,i.tmp.retina&&(i.canvas.w*=i.canvas.pxratio,i.canvas.h*=i.canvas.pxratio),i.canvas.el.width=i.canvas.w,i.canvas.el.height=i.canvas.h,i.particles.move.enable||(i.fn.particlesEmpty(),i.fn.particlesCreate(),i.fn.particlesDraw(),i.fn.vendors.densityAutoParticles()),i.fn.vendors.densityAutoParticles()})},i.fn.canvasPaint=function(){i.canvas.ctx.fillRect(0,0,i.canvas.w,i.canvas.h)},i.fn.canvasClear=function(){i.canvas.ctx.clearRect(0,0,i.canvas.w,i.canvas.h)},i.fn.particle=function(e,a,t){if(this.radius=(i.particles.size.random?Math.random():1)*i.particles.size.value,i.particles.size.anim.enable&&(this.size_status=!1,this.vs=i.particles.size.anim.speed/100,i.particles.size.anim.sync||(this.vs=this.vs*Math.random())),this.x=t?t.x:Math.random()*i.canvas.w,this.y=t?t.y:Math.random()*i.canvas.h,this.x>i.canvas.w-2*this.radius?this.x=this.x-this.radius:this.x<2*this.radius&&(this.x=this.x+this.radius),this.y>i.canvas.h-2*this.radius?this.y=this.y-this.radius:this.y<2*this.radius&&(this.y=this.y+this.radius),i.particles.move.bounce&&i.fn.vendors.checkOverlap(this,t),this.color={},"object"==typeof e.value)if(e.value instanceof Array){var s=e.value[Math.floor(Math.random()*i.particles.color.value.length)];this.color.rgb=hexToRgb(s)}else void 0!=e.value.r&&void 0!=e.value.g&&void 0!=e.value.b&&(this.color.rgb={r:e.value.r,g:e.value.g,b:e.value.b}),void 0!=e.value.h&&void 0!=e.value.s&&void 0!=e.value.l&&(this.color.hsl={h:e.value.h,s:e.value.s,l:e.value.l});else"random"==e.value?this.color.rgb={r:Math.floor(256*Math.random())+0,g:Math.floor(256*Math.random())+0,b:Math.floor(256*Math.random())+0}:"string"==typeof e.value&&(this.color=e,this.color.rgb=hexToRgb(this.color.value));this.opacity=(i.particles.opacity.random?Math.random():1)*i.particles.opacity.value,i.particles.opacity.anim.enable&&(this.opacity_status=!1,this.vo=i.particles.opacity.anim.speed/100,i.particles.opacity.anim.sync||(this.vo=this.vo*Math.random()));var n={};switch(i.particles.move.direction){case"top":n={x:0,y:-1};break;case"top-right":n={x:.5,y:-.5};break;case"right":n={x:1,y:-0};break;case"bottom-right":n={x:.5,y:.5};break;case"bottom":n={x:0,y:1};break;case"bottom-left":n={x:-.5,y:1};break;case"left":n={x:-1,y:0};break;case"top-left":n={x:-.5,y:-.5};break;default:n={x:0,y:0}}i.particles.move.straight?(this.vx=n.x,this.vy=n.y,i.particles.move.random&&(this.vx=this.vx*Math.random(),this.vy=this.vy*Math.random())):(this.vx=n.x+Math.random()-.5,this.vy=n.y+Math.random()-.5),this.vx_i=this.vx,this.vy_i=this.vy;var r=i.particles.shape.type;if("object"==typeof r){if(r instanceof Array){var c=r[Math.floor(Math.random()*r.length)];this.shape=c}}else this.shape=r;if("image"==this.shape){var o=i.particles.shape;this.img={src:o.image.src,ratio:o.image.width/o.image.height},this.img.ratio||(this.img.ratio=1),"svg"==i.tmp.img_type&&void 0!=i.tmp.source_svg&&(i.fn.vendors.createSvgImg(this),i.tmp.pushing&&(this.img.loaded=!1))}},i.fn.particle.prototype.draw=function(){function e(){i.canvas.ctx.drawImage(r,a.x-t,a.y-t,2*t,2*t/a.img.ratio)}var a=this;if(void 0!=a.radius_bubble)var t=a.radius_bubble;else var t=a.radius;if(void 0!=a.opacity_bubble)var s=a.opacity_bubble;else var s=a.opacity;if(a.color.rgb)var n="rgba("+a.color.rgb.r+","+a.color.rgb.g+","+a.color.rgb.b+","+s+")";else var n="hsla("+a.color.hsl.h+","+a.color.hsl.s+"%,"+a.color.hsl.l+"%,"+s+")";switch(i.canvas.ctx.fillStyle=n,i.canvas.ctx.beginPath(),a.shape){case"circle":i.canvas.ctx.arc(a.x,a.y,t,0,2*Math.PI,!1);break;case"edge":i.canvas.ctx.rect(a.x-t,a.y-t,2*t,2*t);break;case"triangle":i.fn.vendors.drawShape(i.canvas.ctx,a.x-t,a.y+t/1.66,2*t,3,2);break;case"polygon":i.fn.vendors.drawShape(i.canvas.ctx,a.x-t/(i.particles.shape.polygon.nb_sides/3.5),a.y-t/.76,2.66*t/(i.particles.shape.polygon.nb_sides/3),i.particles.shape.polygon.nb_sides,1);break;case"star":i.fn.vendors.drawShape(i.canvas.ctx,a.x-2*t/(i.particles.shape.polygon.nb_sides/4),a.y-t/1.52,2*t*2.66/(i.particles.shape.polygon.nb_sides/3),i.particles.shape.polygon.nb_sides,2);break;case"image":if("svg"==i.tmp.img_type)var r=a.img.obj;else var r=i.tmp.img_obj;r&&e()}i.canvas.ctx.closePath(),i.particles.shape.stroke.width>0&&(i.canvas.ctx.strokeStyle=i.particles.shape.stroke.color,i.canvas.ctx.lineWidth=i.particles.shape.stroke.width,i.canvas.ctx.stroke()),i.canvas.ctx.fill()},i.fn.particlesCreate=function(){for(var e=0;e=i.particles.opacity.value&&(a.opacity_status=!1),a.opacity+=a.vo):(a.opacity<=i.particles.opacity.anim.opacity_min&&(a.opacity_status=!0),a.opacity-=a.vo),a.opacity<0&&(a.opacity=0)),i.particles.size.anim.enable&&(1==a.size_status?(a.radius>=i.particles.size.value&&(a.size_status=!1),a.radius+=a.vs):(a.radius<=i.particles.size.anim.size_min&&(a.size_status=!0),a.radius-=a.vs),a.radius<0&&(a.radius=0)),"bounce"==i.particles.move.out_mode)var s={x_left:a.radius,x_right:i.canvas.w,y_top:a.radius,y_bottom:i.canvas.h};else var s={x_left:-a.radius,x_right:i.canvas.w+a.radius,y_top:-a.radius,y_bottom:i.canvas.h+a.radius};switch(a.x-a.radius>i.canvas.w?(a.x=s.x_left,a.y=Math.random()*i.canvas.h):a.x+a.radius<0&&(a.x=s.x_right,a.y=Math.random()*i.canvas.h),a.y-a.radius>i.canvas.h?(a.y=s.y_top,a.x=Math.random()*i.canvas.w):a.y+a.radius<0&&(a.y=s.y_bottom,a.x=Math.random()*i.canvas.w),i.particles.move.out_mode){case"bounce":a.x+a.radius>i.canvas.w?a.vx=-a.vx:a.x-a.radius<0&&(a.vx=-a.vx),a.y+a.radius>i.canvas.h?a.vy=-a.vy:a.y-a.radius<0&&(a.vy=-a.vy)}if(isInArray("grab",i.interactivity.events.onhover.mode)&&i.fn.modes.grabParticle(a),(isInArray("bubble",i.interactivity.events.onhover.mode)||isInArray("bubble",i.interactivity.events.onclick.mode))&&i.fn.modes.bubbleParticle(a),(isInArray("repulse",i.interactivity.events.onhover.mode)||isInArray("repulse",i.interactivity.events.onclick.mode))&&i.fn.modes.repulseParticle(a),i.particles.line_linked.enable||i.particles.move.attract.enable)for(var n=e+1;n0){var c=i.particles.line_linked.color_rgb_line;i.canvas.ctx.strokeStyle="rgba("+c.r+","+c.g+","+c.b+","+r+")",i.canvas.ctx.lineWidth=i.particles.line_linked.width,i.canvas.ctx.beginPath(),i.canvas.ctx.moveTo(e.x,e.y),i.canvas.ctx.lineTo(a.x,a.y),i.canvas.ctx.stroke(),i.canvas.ctx.closePath()}}},i.fn.interact.attractParticles=function(e,a){var t=e.x-a.x,s=e.y-a.y,n=Math.sqrt(t*t+s*s);if(n<=i.particles.line_linked.distance){var r=t/(1e3*i.particles.move.attract.rotateX),c=s/(1e3*i.particles.move.attract.rotateY);e.vx-=r,e.vy-=c,a.vx+=r,a.vy+=c}},i.fn.interact.bounceParticles=function(e,a){var t=e.x-a.x,i=e.y-a.y,s=Math.sqrt(t*t+i*i),n=e.radius+a.radius;n>=s&&(e.vx=-e.vx,e.vy=-e.vy,a.vx=-a.vx,a.vy=-a.vy)},i.fn.modes.pushParticles=function(e,a){i.tmp.pushing=!0;for(var t=0;e>t;t++)i.particles.array.push(new i.fn.particle(i.particles.color,i.particles.opacity.value,{x:a?a.pos_x:Math.random()*i.canvas.w,y:a?a.pos_y:Math.random()*i.canvas.h})),t==e-1&&(i.particles.move.enable||i.fn.particlesDraw(),i.tmp.pushing=!1)},i.fn.modes.removeParticles=function(e){i.particles.array.splice(0,e),i.particles.move.enable||i.fn.particlesDraw()},i.fn.modes.bubbleParticle=function(e){function a(){e.opacity_bubble=e.opacity,e.radius_bubble=e.radius}function t(a,t,s,n,c){if(a!=t)if(i.tmp.bubble_duration_end){if(void 0!=s){var o=n-p*(n-a)/i.interactivity.modes.bubble.duration,l=a-o;d=a+l,"size"==c&&(e.radius_bubble=d),"opacity"==c&&(e.opacity_bubble=d)}}else if(r<=i.interactivity.modes.bubble.distance){if(void 0!=s)var v=s;else var v=n;if(v!=a){var d=n-p*(n-a)/i.interactivity.modes.bubble.duration;"size"==c&&(e.radius_bubble=d),"opacity"==c&&(e.opacity_bubble=d)}}else"size"==c&&(e.radius_bubble=void 0),"opacity"==c&&(e.opacity_bubble=void 0)}if(i.interactivity.events.onhover.enable&&isInArray("bubble",i.interactivity.events.onhover.mode)){var s=e.x-i.interactivity.mouse.pos_x,n=e.y-i.interactivity.mouse.pos_y,r=Math.sqrt(s*s+n*n),c=1-r/i.interactivity.modes.bubble.distance;if(r<=i.interactivity.modes.bubble.distance){if(c>=0&&"mousemove"==i.interactivity.status){if(i.interactivity.modes.bubble.size!=i.particles.size.value)if(i.interactivity.modes.bubble.size>i.particles.size.value){var o=e.radius+i.interactivity.modes.bubble.size*c;o>=0&&(e.radius_bubble=o)}else{var l=e.radius-i.interactivity.modes.bubble.size,o=e.radius-l*c;o>0?e.radius_bubble=o:e.radius_bubble=0}if(i.interactivity.modes.bubble.opacity!=i.particles.opacity.value)if(i.interactivity.modes.bubble.opacity>i.particles.opacity.value){var v=i.interactivity.modes.bubble.opacity*c;v>e.opacity&&v<=i.interactivity.modes.bubble.opacity&&(e.opacity_bubble=v)}else{var v=e.opacity-(i.particles.opacity.value-i.interactivity.modes.bubble.opacity)*c;v=i.interactivity.modes.bubble.opacity&&(e.opacity_bubble=v)}}}else a();"mouseleave"==i.interactivity.status&&a()}else if(i.interactivity.events.onclick.enable&&isInArray("bubble",i.interactivity.events.onclick.mode)){if(i.tmp.bubble_clicking){var s=e.x-i.interactivity.mouse.click_pos_x,n=e.y-i.interactivity.mouse.click_pos_y,r=Math.sqrt(s*s+n*n),p=((new Date).getTime()-i.interactivity.mouse.click_time)/1e3;p>i.interactivity.modes.bubble.duration&&(i.tmp.bubble_duration_end=!0),p>2*i.interactivity.modes.bubble.duration&&(i.tmp.bubble_clicking=!1,i.tmp.bubble_duration_end=!1)}i.tmp.bubble_clicking&&(t(i.interactivity.modes.bubble.size,i.particles.size.value,e.radius_bubble,e.radius,"size"),t(i.interactivity.modes.bubble.opacity,i.particles.opacity.value,e.opacity_bubble,e.opacity,"opacity"))}},i.fn.modes.repulseParticle=function(e){function a(){var a=Math.atan2(d,p);if(e.vx=u*Math.cos(a),e.vy=u*Math.sin(a),"bounce"==i.particles.move.out_mode){var t={x:e.x+e.vx,y:e.y+e.vy};t.x+e.radius>i.canvas.w?e.vx=-e.vx:t.x-e.radius<0&&(e.vx=-e.vx),t.y+e.radius>i.canvas.h?e.vy=-e.vy:t.y-e.radius<0&&(e.vy=-e.vy)}}if(i.interactivity.events.onhover.enable&&isInArray("repulse",i.interactivity.events.onhover.mode)&&"mousemove"==i.interactivity.status){var t=e.x-i.interactivity.mouse.pos_x,s=e.y-i.interactivity.mouse.pos_y,n=Math.sqrt(t*t+s*s),r={x:t/n,y:s/n},c=i.interactivity.modes.repulse.distance,o=100,l=clamp(1/c*(-1*Math.pow(n/c,2)+1)*c*o,0,50),v={x:e.x+r.x*l,y:e.y+r.y*l};"bounce"==i.particles.move.out_mode?(v.x-e.radius>0&&v.x+e.radius0&&v.y+e.radius=m&&a()}else 0==i.tmp.repulse_clicking&&(e.vx=e.vx_i,e.vy=e.vy_i)},i.fn.modes.grabParticle=function(e){if(i.interactivity.events.onhover.enable&&"mousemove"==i.interactivity.status){var a=e.x-i.interactivity.mouse.pos_x,t=e.y-i.interactivity.mouse.pos_y,s=Math.sqrt(a*a+t*t);if(s<=i.interactivity.modes.grab.distance){var n=i.interactivity.modes.grab.line_linked.opacity-s/(1/i.interactivity.modes.grab.line_linked.opacity)/i.interactivity.modes.grab.distance;if(n>0){var r=i.particles.line_linked.color_rgb_line;i.canvas.ctx.strokeStyle="rgba("+r.r+","+r.g+","+r.b+","+n+")",i.canvas.ctx.lineWidth=i.particles.line_linked.width,i.canvas.ctx.beginPath(),i.canvas.ctx.moveTo(e.x,e.y),i.canvas.ctx.lineTo(i.interactivity.mouse.pos_x,i.interactivity.mouse.pos_y),i.canvas.ctx.stroke(),i.canvas.ctx.closePath()}}}},i.fn.vendors.eventsListeners=function(){"window"==i.interactivity.detect_on?i.interactivity.el=window:i.interactivity.el=i.canvas.el,(i.interactivity.events.onhover.enable||i.interactivity.events.onclick.enable)&&(i.interactivity.el.addEventListener("mousemove",function(e){if(i.interactivity.el==window)var a=e.clientX,t=e.clientY;else var a=e.offsetX||e.clientX,t=e.offsetY||e.clientY;i.interactivity.mouse.pos_x=a,i.interactivity.mouse.pos_y=t,i.tmp.retina&&(i.interactivity.mouse.pos_x*=i.canvas.pxratio,i.interactivity.mouse.pos_y*=i.canvas.pxratio),i.interactivity.status="mousemove"}),i.interactivity.el.addEventListener("mouseleave",function(e){i.interactivity.mouse.pos_x=null,i.interactivity.mouse.pos_y=null,i.interactivity.status="mouseleave"})),i.interactivity.events.onclick.enable&&i.interactivity.el.addEventListener("click",function(){if(i.interactivity.mouse.click_pos_x=i.interactivity.mouse.pos_x,i.interactivity.mouse.click_pos_y=i.interactivity.mouse.pos_y,i.interactivity.mouse.click_time=(new Date).getTime(),i.interactivity.events.onclick.enable)switch(i.interactivity.events.onclick.mode){case"push":i.particles.move.enable?i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb,i.interactivity.mouse):1==i.interactivity.modes.push.particles_nb?i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb,i.interactivity.mouse):i.interactivity.modes.push.particles_nb>1&&i.fn.modes.pushParticles(i.interactivity.modes.push.particles_nb);break;case"remove":i.fn.modes.removeParticles(i.interactivity.modes.remove.particles_nb);break;case"bubble":i.tmp.bubble_clicking=!0;break;case"repulse":i.tmp.repulse_clicking=!0,i.tmp.repulse_count=0,i.tmp.repulse_finish=!1,setTimeout(function(){i.tmp.repulse_clicking=!1},1e3*i.interactivity.modes.repulse.duration)}})},i.fn.vendors.densityAutoParticles=function(){if(i.particles.number.density.enable){var e=i.canvas.el.width*i.canvas.el.height/1e3;i.tmp.retina&&(e/=2*i.canvas.pxratio);var a=e*i.particles.number.value/i.particles.number.density.value_area,t=i.particles.array.length-a;0>t?i.fn.modes.pushParticles(Math.abs(t)):i.fn.modes.removeParticles(t)}},i.fn.vendors.checkOverlap=function(e,a){for(var t=0;tv;v++)e.lineTo(i,0),e.translate(i,0),e.rotate(l);e.fill(),e.restore()},i.fn.vendors.exportImg=function(){window.open(i.canvas.el.toDataURL("image/png"),"_blank")},i.fn.vendors.loadImg=function(e){if(i.tmp.img_error=void 0,""!=i.particles.shape.image.src)if("svg"==e){var a=new XMLHttpRequest;a.open("GET",i.particles.shape.image.src),a.onreadystatechange=function(e){4==a.readyState&&(200==a.status?(i.tmp.source_svg=e.currentTarget.response,i.fn.vendors.checkBeforeDraw()):(console.log("Error pJS - Image not found"),i.tmp.img_error=!0))},a.send()}else{var t=new Image;t.addEventListener("load",function(){i.tmp.img_obj=t,i.fn.vendors.checkBeforeDraw()}),t.src=i.particles.shape.image.src}else console.log("Error pJS - No image.src"),i.tmp.img_error=!0},i.fn.vendors.draw=function(){"image"==i.particles.shape.type?"svg"==i.tmp.img_type?i.tmp.count_svg>=i.particles.number.value?(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame)):i.tmp.img_error||(i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw)):void 0!=i.tmp.img_obj?(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame)):i.tmp.img_error||(i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw)):(i.fn.particlesDraw(),i.particles.move.enable?i.fn.drawAnimFrame=requestAnimFrame(i.fn.vendors.draw):cancelRequestAnimFrame(i.fn.drawAnimFrame))},i.fn.vendors.checkBeforeDraw=function(){"image"==i.particles.shape.type?"svg"==i.tmp.img_type&&void 0==i.tmp.source_svg?i.tmp.checkAnimFrame=requestAnimFrame(check):(cancelRequestAnimFrame(i.tmp.checkAnimFrame),i.tmp.img_error||(i.fn.vendors.init(),i.fn.vendors.draw())):(i.fn.vendors.init(),i.fn.vendors.draw())},i.fn.vendors.init=function(){i.fn.retinaInit(),i.fn.canvasInit(),i.fn.canvasSize(),i.fn.canvasPaint(),i.fn.particlesCreate(),i.fn.vendors.densityAutoParticles(),i.particles.line_linked.color_rgb_line=hexToRgb(i.particles.line_linked.color)},i.fn.vendors.start=function(){isInArray("image",i.particles.shape.type)?(i.tmp.img_type=i.particles.shape.image.src.substr(i.particles.shape.image.src.length-3),i.fn.vendors.loadImg(i.tmp.img_type)):i.fn.vendors.checkBeforeDraw()},i.fn.vendors.eventsListeners(),i.fn.vendors.start()};Object.deepExtend=function(e,a){for(var t in a)a[t]&&a[t].constructor&&a[t].constructor===Object?(e[t]=e[t]||{},arguments.callee(e[t],a[t])):e[t]=a[t];return e},window.requestAnimFrame=function(){return window.requestAnimationFrame||window.webkitRequestAnimationFrame||window.mozRequestAnimationFrame||window.oRequestAnimationFrame||window.msRequestAnimationFrame||function(e){window.setTimeout(e,1e3/60)}}(),window.cancelRequestAnimFrame=function(){return window.cancelAnimationFrame||window.webkitCancelRequestAnimationFrame||window.mozCancelRequestAnimationFrame||window.oCancelRequestAnimationFrame||window.msCancelRequestAnimationFrame||clearTimeout}(),window.pJSDom=[],window.particlesJS=function(e,a){"string"!=typeof e&&(a=e,e="particles-js"),e||(e="particles-js");var t=document.getElementById(e),i="particles-js-canvas-el",s=t.getElementsByClassName(i);if(s.length)for(;s.length>0;)t.removeChild(s[0]);var n=document.createElement("canvas");n.className=i,n.style.width="100%",n.style.height="100%";var r=document.getElementById(e).appendChild(n);null!=r&&pJSDom.push(new pJS(e,a))},window.particlesJS.load=function(e,a,t){var i=new XMLHttpRequest;i.open("GET",a),i.onreadystatechange=function(a){if(4==i.readyState)if(200==i.status){var s=JSON.parse(a.currentTarget.response);window.particlesJS(e,s),t&&t()}else console.log("Error pJS - XMLHttpRequest status: "+i.status),console.log("Error pJS - File config not found")},i.send()}; diff --git a/src/templates/dashboard.html b/src/templates/dashboard.html new file mode 100644 index 0000000..93f0de8 --- /dev/null +++ b/src/templates/dashboard.html @@ -0,0 +1,1985 @@ + + + + + + Mostovik Parser Dashboard + + + +
+
+

Parser Dashboard

+
Управление источниками, загрузками, расписаниями Celery и внешней выгрузкой
+
+
+ + + + нет токена + + +
+
+ +
+
+

Вход

+
+
+ + +
+
+ +
+
+
+
+ + + + +
+ + + + + + + + + + + + + + + + diff --git a/src/user/admin.py b/src/user/admin.py index 4cf637a..0c44eaf 100644 --- a/src/user/admin.py +++ b/src/user/admin.py @@ -2,12 +2,13 @@ Admin configuration for user app. """ -from user.models import Profile, User from django.contrib import admin from django.contrib.auth.admin import UserAdmin as BaseUserAdmin from django.utils.html import format_html from django.utils.translation import gettext_lazy as _ +from user.models import Profile, User + class ProfileInline(admin.StackedInline): """Inline для профиля пользователя.""" diff --git a/src/user/migrations/0011_sync_model_options.py b/src/user/migrations/0011_sync_model_options.py new file mode 100644 index 0000000..dd89e7b --- /dev/null +++ b/src/user/migrations/0011_sync_model_options.py @@ -0,0 +1,27 @@ +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0010_profile_names_required"), + ] + + operations = [ + migrations.AlterModelOptions( + name="profile", + options={ + "ordering": ["-created_at"], + "verbose_name": "профиль", + "verbose_name_plural": "профили", + }, + ), + migrations.AlterModelOptions( + name="user", + options={ + "ordering": ["-created_at"], + "verbose_name": "пользователь", + "verbose_name_plural": "пользователи", + }, + ), + ]