From b8a18d6da436faa6ef7574eb866dda1895211389 Mon Sep 17 00:00:00 2001 From: Aleksandr Meshchriakov Date: Tue, 19 May 2026 20:21:31 +0200 Subject: [PATCH] feat: migrate parser data to source records --- .gitignore | 1 + src/apps/core/views.py | 13 + src/apps/exchange/urls.py | 7 +- src/apps/exchange/views.py | 140 +- src/apps/parsers/services.py | 94 +- src/apps/parsers/source_cards.py | 448 +- src/apps/parsers/tasks.py | 352 +- src/apps/parsers/views.py | 11 +- src/core/urls.py | 43 +- src/organizations/filters.py | 4 +- src/organizations/models.py | 109 +- src/organizations/serializers.py | 49 +- src/organizations/signals.py | 21 + src/organizations/source_backfill.py | 62 +- src/organizations/source_cache.py | 24 + src/organizations/source_ingestion.py | 197 +- src/organizations/views.py | 59 +- src/registers/dashboard_cache.py | 158 + src/registers/serializers.py | 50 + src/registers/tasks.py | 21 + src/registers/urls.py | 2 + src/registers/views.py | 143 +- src/settings/base.py | 1 + src/settings/dev.py | 10 +- src/templates/dashboard.html | 5178 ----------------- src/user/urls.py | 15 +- src/user/views.py | 8 +- tests/apps/exchange/test_views.py | 56 +- .../test_api_v2_source_extensions.py | 38 + .../organizations/test_source_ingestion.py | 53 + tests/apps/parsers/test_admin.py | 19 +- tests/apps/parsers/test_dashboard_page.py | 171 +- .../parsers/test_direct_ingestion_services.py | 45 + tests/apps/parsers/test_e2e.py | 14 +- tests/apps/parsers/test_fns_upload.py | 25 +- .../apps/parsers/test_procurement_service.py | 109 +- tests/apps/parsers/test_service_helpers.py | 157 +- tests/apps/parsers/test_services.py | 25 + .../apps/parsers/test_source_cards_service.py | 81 + tests/apps/parsers/test_source_cards_views.py | 147 +- tests/apps/parsers/test_sources_api_e2e.py | 2 + tests/apps/parsers/test_tasks.py | 387 +- tests/apps/parsers/test_views.py | 5 +- tests/apps/user/test_views.py | 210 +- tests/settings/test_dev_settings.py | 17 + tests/test_api_inventory_e2e.py | 87 +- 46 files changed, 2689 insertions(+), 6179 deletions(-) create mode 100644 src/organizations/source_cache.py create mode 100644 src/registers/dashboard_cache.py create mode 100644 src/registers/tasks.py delete mode 100644 src/templates/dashboard.html create mode 100644 tests/settings/test_dev_settings.py diff --git a/.gitignore b/.gitignore index 32760f3..5784692 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ Thumbs.db # Backup files *.bak +backups/ *.backupdata/ data/ deteil.json diff --git a/src/apps/core/views.py b/src/apps/core/views.py index 1d0cda9..5f8bbe7 100644 --- a/src/apps/core/views.py +++ b/src/apps/core/views.py @@ -37,6 +37,19 @@ HEALTH_TAG = swagger_tag("Мониторинг", "monitoring") JOBS_TAG = swagger_tag("Фоновые задачи", "background_jobs") +class DisabledEndpointView(APIView): + """ + Disabled API route kept only for stable URL reversing and stale clients. + + DRF returns 405 for every HTTP method because the endpoint is intentionally + absent from the runtime API surface. + """ + + authentication_classes = [] + permission_classes = [] + http_method_names: list[str] = [] + + class HealthCheckView(APIView): """ Комплексная проверка состояния системы. diff --git a/src/apps/exchange/urls.py b/src/apps/exchange/urls.py index e7aa414..5666793 100644 --- a/src/apps/exchange/urls.py +++ b/src/apps/exchange/urls.py @@ -1,12 +1,11 @@ """URL конфигурация приложения exchange.""" +from apps.core.views import DisabledEndpointView from apps.exchange.views import ( ExchangeConnectionListCreateView, ExchangeConnectionTestView, - ExchangeCopyDataView, ExchangePeriodicTaskDetailView, ExchangePeriodicTaskListCreateView, - ExchangeTableListView, ) from django.urls import path @@ -21,8 +20,8 @@ exchange_urlpatterns = [ ExchangeConnectionTestView.as_view(), name="connections-test", ), - path("tables/", ExchangeTableListView.as_view(), name="tables"), - path("copy/", ExchangeCopyDataView.as_view(), name="copy"), + path("tables/", DisabledEndpointView.as_view(), name="tables"), + path("copy/", DisabledEndpointView.as_view(), name="copy"), path( "periodic-tasks/", ExchangePeriodicTaskListCreateView.as_view(), diff --git a/src/apps/exchange/views.py b/src/apps/exchange/views.py index 53c0ece..fbda1d9 100644 --- a/src/apps/exchange/views.py +++ b/src/apps/exchange/views.py @@ -1,17 +1,12 @@ """API views для обмена данными с внешней БД.""" -from contextlib import suppress - from apps.core.openapi import CommonResponses, ErrorResponses, swagger_tag -from apps.core.response import api_response -from apps.core.services import BackgroundJobService from apps.exchange.models import ExchangeConnection from apps.exchange.serializers import ( ExchangeConnectionCreateSerializer, ExchangeConnectionListResponseSerializer, ExchangeConnectionSerializer, ExchangeConnectionTestResponseSerializer, - ExchangeCopyRequestSerializer, ExchangePeriodicTaskListResponseSerializer, ExchangePeriodicTaskSerializer, ExchangePeriodicTaskUpsertSerializer, @@ -21,10 +16,7 @@ from apps.exchange.services import ( ExchangePeriodicTaskService, ExchangeServiceError, ) -from apps.exchange.tasks import copy_parsers_data_async -from django.db import IntegrityError from django.shortcuts import get_object_or_404 -from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status from rest_framework.exceptions import ValidationError @@ -128,110 +120,6 @@ class ExchangeConnectionTestView(APIView): ) -class ExchangeCopyDataView(APIView): - """API запуска копирования данных в целевую БД.""" - - permission_classes = [IsAdminUser] - - @swagger_auto_schema( - tags=[EXCHANGE_TAG], - operation_summary="Копировать данные parsers в target DB", - operation_description=( - "Асинхронно запускает копирование данных из локальной БД " - "в активную целевую БД.\n" - "Перед копированием выполняется только проверка структуры " - "(без изменения схемы/миграций).\n" - "Поддерживает режимы: all / single / selected." - ), - request_body=ExchangeCopyRequestSerializer, - responses={ - 202: openapi.Response( - description="Копирование поставлено в очередь", - schema=openapi.Schema( - type=openapi.TYPE_OBJECT, - properties={ - "status": openapi.Schema(type=openapi.TYPE_STRING), - "message": openapi.Schema(type=openapi.TYPE_STRING), - "task_id": openapi.Schema(type=openapi.TYPE_STRING), - "connection_id": openapi.Schema(type=openapi.TYPE_INTEGER), - "mode": openapi.Schema(type=openapi.TYPE_STRING), - "truncate_before_copy": openapi.Schema( - type=openapi.TYPE_BOOLEAN - ), - }, - ), - ), - 400: CommonResponses.BAD_REQUEST, - **ErrorResponses.ADMIN, - }, - ) - def post(self, request): - serializer = ExchangeCopyRequestSerializer(data=request.data) - serializer.is_valid(raise_exception=True) - - try: - active_connection = ExchangeConnectionService.get_active_connection() - task = copy_parsers_data_async.delay( - connection_id=active_connection.id, - payload=serializer.validated_data, - requested_by_id=request.user.id - if request.user.is_authenticated - else None, - ) - - # Предсоздаём запись для мгновенного отслеживания в /api/v1/jobs/{task_id}/ - with suppress(IntegrityError): - BackgroundJobService.create_job( - task_id=task.id, - task_name="apps.exchange.tasks.copy_parsers_data_async", - user_id=request.user.id if request.user.is_authenticated else None, - meta={ - "connection_id": active_connection.id, - "mode": serializer.validated_data["mode"], - "table": serializer.validated_data.get("table"), - "tables": serializer.validated_data.get("tables"), - "truncate_before_copy": serializer.validated_data.get( - "truncate_before_copy" - ), - }, - ) - except ExchangeServiceError as exc: - raise ValidationError({"copy": str(exc)}) from exc - - return api_response( - { - "status": "started", - "message": "Копирование запущено в фоне.", - "task_id": task.id, - "connection_id": active_connection.id, - "mode": serializer.validated_data["mode"], - "truncate_before_copy": serializer.validated_data[ - "truncate_before_copy" - ], - }, - status_code=status.HTTP_202_ACCEPTED, - ) - - -class ExchangeTableListView(APIView): - """Список таблиц, доступных для выгрузки во внешнюю БД.""" - - permission_classes = [IsAdminUser] - - @swagger_auto_schema( - tags=[EXCHANGE_TAG], - operation_summary="Список таблиц exchange", - operation_description="Возвращает таблицы parser-моделей для режима selected.", - responses={200: "Exchange table choices", **ErrorResponses.ADMIN}, - ) - def get(self, request): - rows = [ - {"table": table, "value": table, "title": title, "label": title} - for table, title in ExchangeConnectionService.get_copy_table_choices() - ] - return api_response(rows) - - class ExchangePeriodicTaskListCreateView(APIView): """API списка и создания периодических задач обмена.""" @@ -289,26 +177,10 @@ class ExchangePeriodicTaskListCreateView(APIView): class ExchangePeriodicTaskDetailView(APIView): - """API чтения и изменения периодической задачи обмена.""" + """API изменения периодической задачи обмена.""" permission_classes = [IsAdminUser] - - @swagger_auto_schema( - tags=[EXCHANGE_TAG], - operation_summary="Детали периодической задачи обмена", - responses={ - 200: ExchangePeriodicTaskSerializer, - 404: CommonResponses.NOT_FOUND, - **ErrorResponses.ADMIN, - }, - ) - def get(self, request, task_id: int): - task = get_object_or_404( - ExchangePeriodicTaskService.get_queryset(), - id=task_id, - ) - output = ExchangePeriodicTaskSerializer(task) - return Response(output.data, status=status.HTTP_200_OK) + http_method_names = ["patch", "head", "options"] @swagger_auto_schema( tags=[EXCHANGE_TAG], @@ -354,11 +226,3 @@ class ExchangePeriodicTaskDetailView(APIView): output = ExchangePeriodicTaskSerializer(task) return Response(output.data, status=status.HTTP_200_OK) - - def delete(self, request, task_id: int): - task = get_object_or_404( - ExchangePeriodicTaskService.get_queryset(), - id=task_id, - ) - task.delete() - return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/src/apps/parsers/services.py b/src/apps/parsers/services.py index 34f6539..67454db 100644 --- a/src/apps/parsers/services.py +++ b/src/apps/parsers/services.py @@ -491,34 +491,42 @@ class IndustrialCertificateService( logger.info("Saving %d certificates (batch_id=%d)", len(certificates), batch_id) - source_records = [ - SourceRecordInput( - external_id=cert.certificate_number, - title=cert.certificate_number, - organization_name=cert.organisation_name, - inn=cert.inn, - ogrn=cert.ogrn, - record_date=cert.issue_date, - url=cert.certificate_file_url, - payload={ - "load_batch": batch_id, - "issue_date": cert.issue_date, - "issue_date_normalized": _date_to_iso( - normalize_to_date(cert.issue_date), - ), - "certificate_number": cert.certificate_number, - "expiry_date": cert.expiry_date, - "expiry_date_normalized": _date_to_iso( - normalize_to_date(cert.expiry_date), - ), - "certificate_file_url": cert.certificate_file_url, - "organisation_name": cert.organisation_name, - "inn": cert.inn, - "ogrn": cert.ogrn, - }, + source_records = [] + for cert in certificates: + certificate_number = cert.certificate_number.strip() + if not certificate_number or certificate_number in {"-", "—"}: + continue + source_records.append( + SourceRecordInput( + external_id=certificate_number, + title=certificate_number, + organization_name=cert.organisation_name, + inn=cert.inn, + ogrn=cert.ogrn, + record_date=cert.issue_date, + url=cert.certificate_file_url, + payload={ + "load_batch": batch_id, + "issue_date": cert.issue_date, + "issue_date_normalized": _date_to_iso( + normalize_to_date(cert.issue_date), + ), + "certificate_number": certificate_number, + "expiry_date": cert.expiry_date, + "expiry_date_normalized": _date_to_iso( + normalize_to_date(cert.expiry_date), + ), + "certificate_file_url": cert.certificate_file_url, + "organisation_name": cert.organisation_name, + "inn": cert.inn, + "ogrn": cert.ogrn, + }, + ) ) - for cert in certificates - ] + + if not source_records: + logger.warning("No certificates with certificate numbers to save") + return 0 result = OrganizationSourceIngestionService.save_records( source=ParserLoadLog.Source.INDUSTRIAL, @@ -644,7 +652,8 @@ class ManufacturerService(BulkOperationsMixin, BaseService[ManufacturerRecord]): """ qs = OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.MANUFACTURES, - extension__organization__inn=inn, + ).filter( + Q(extension__organization__inn=inn) | Q(payload__inn=inn), ) if batch_id: qs = qs.filter(load_batch=batch_id) @@ -655,7 +664,12 @@ class ManufacturerService(BulkOperationsMixin, BaseService[ManufacturerRecord]): """Найти производителей по ОГРН.""" return OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.MANUFACTURES, - extension__organization__ogrn=ogrn, + ).filter( + Q(payload__ogrn=ogrn) + | ( + Q(extension__organization__ogrn=ogrn) + & (Q(payload__ogrn__isnull=True) | Q(payload__ogrn="")) + ), ) @@ -746,7 +760,8 @@ class IndustrialProductService( """Найти продукцию по ИНН.""" qs = OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS, - extension__organization__inn=inn, + ).filter( + Q(extension__organization__inn=inn) | Q(payload__inn=inn), ) if batch_id: qs = qs.filter(load_batch=batch_id) @@ -757,7 +772,12 @@ class IndustrialProductService( """Найти продукцию по ОГРН.""" return OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS, - extension__organization__ogrn=ogrn, + ).filter( + Q(payload__ogrn=ogrn) + | ( + Q(extension__organization__ogrn=ogrn) + & (Q(payload__ogrn__isnull=True) | Q(payload__ogrn="")) + ), ) @classmethod @@ -902,7 +922,9 @@ class GenericParserRecordService(BulkOperationsMixin, BaseService[GenericParserR @classmethod def find_by_inn(cls, inn: str, source: str | None = None): """Найти generic records по ИНН.""" - qs = OrganizationSourceRecord.objects.filter(extension__organization__inn=inn) + qs = OrganizationSourceRecord.objects.filter( + Q(extension__organization__inn=inn) | Q(payload__inn=inn), + ) if source: qs = qs.filter(source=source) return qs @@ -910,7 +932,13 @@ class GenericParserRecordService(BulkOperationsMixin, BaseService[GenericParserR @classmethod def find_by_ogrn(cls, ogrn: str, source: str | None = None): """Найти generic records по ОГРН.""" - qs = OrganizationSourceRecord.objects.filter(extension__organization__ogrn=ogrn) + qs = OrganizationSourceRecord.objects.filter( + Q(payload__ogrn=ogrn) + | ( + Q(extension__organization__ogrn=ogrn) + & (Q(payload__ogrn__isnull=True) | Q(payload__ogrn="")) + ), + ) if source: qs = qs.filter(source=source) return qs diff --git a/src/apps/parsers/source_cards.py b/src/apps/parsers/source_cards.py index 55627bc..219d45d 100644 --- a/src/apps/parsers/source_cards.py +++ b/src/apps/parsers/source_cards.py @@ -4,8 +4,10 @@ from __future__ import annotations import uuid from contextlib import suppress +from copy import deepcopy from dataclasses import dataclass -from datetime import timedelta +from datetime import datetime, timedelta +from hashlib import blake2s from typing import Any from apps.core.models import JobStatus @@ -15,15 +17,22 @@ from apps.parsers.models import ( ParserLoadLog, ) from django.conf import settings -from django.db.models import Max, Q +from django.core.cache import cache +from django.db.models import Count, Max, Q from django.http import Http404 from django.utils import timezone -from organizations.models import OrganizationSourceRecord +from organizations.models import OrganizationSourceExtension, OrganizationSourceRecord +from organizations.source_cache import ( + get_source_data_cache_version, + invalidate_source_data_cache, +) +from organizations.source_groups import get_source_group_descriptor from rest_framework.exceptions import ValidationError SUCCESSFUL_LOAD_STATUSES = {"success", "skipped"} ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.STARTED, JobStatus.RETRY] STALE_ACTIVE_MAX_AGE_MINUTES = 90 +SOURCE_CARD_STATS_CACHE_TIMEOUT_SECONDS = 7 * 24 * 60 * 60 @dataclass(frozen=True) @@ -63,6 +72,34 @@ class SourceCardDefinition: is_available: bool = True +@dataclass(frozen=True) +class SourceRecordStats: + """Precomputed source-record counters for one source item.""" + + records_count: int = 0 + organizations_count: int = 0 + last_updated_at: datetime | None = None + + +@dataclass(frozen=True) +class SourceCardStatsContext: + """Request-local source-card aggregates shared by all cards in one response.""" + + item_stats_by_code: dict[str, SourceRecordStats] + card_organizations_count_by_slug: dict[str, int] + latest_load_by_source: dict[str, ParserLoadLog] + latest_success_load_by_source: dict[str, ParserLoadLog] + active_tasks_by_slug: dict[str, list[dict[str, Any]]] + + +@dataclass(frozen=True) +class SourceCardAggregateStats: + """Cached source-card counters that change only after parser ingestion.""" + + item_stats_by_code: dict[str, SourceRecordStats] + card_organizations_count_by_slug: dict[str, int] + + SOURCE_CARD_DEFINITIONS: tuple[SourceCardDefinition, ...] = ( SourceCardDefinition( slug="financial-indicators", @@ -357,7 +394,16 @@ class SourceCardService: @classmethod def list_cards(cls) -> list[dict[str, Any]]: - return [cls.get_card(definition.slug) for definition in SOURCE_CARD_DEFINITIONS] + context = cls._build_stats_context(SOURCE_CARD_DEFINITIONS) + return [ + cls._build_card(definition, context) + for definition in SOURCE_CARD_DEFINITIONS + ] + + @classmethod + def clear_cache(cls) -> None: + """Invalidate source-card aggregate counters.""" + invalidate_source_data_cache() @classmethod def list_task_statuses(cls) -> list[dict[str, Any]]: @@ -397,25 +443,35 @@ class SourceCardService: @classmethod def get_card(cls, slug: str) -> dict[str, Any]: definition = cls.get_definition(slug) + context = cls._build_stats_context((definition,)) + return cls._build_card(definition, context) + + @classmethod + def _build_card( + cls, + definition: SourceCardDefinition, + context: SourceCardStatsContext, + ) -> dict[str, Any]: source_items = [ - cls._build_source_item(item) for item in definition.source_items + cls._build_source_item(item, context) for item in definition.source_items ] records_count = sum(item["records_count"] for item in source_items) organizations_count = cls._get_card_organizations_count( - definition, source_items + definition, source_items, context ) - latest_success_load = cls._get_latest_load( - definition, - statuses=SUCCESSFUL_LOAD_STATUSES, + latest_success_load = cls._get_latest_context_load( + definition, context.latest_success_load_by_source + ) + latest_load = cls._get_latest_context_load( + definition, context.latest_load_by_source ) - latest_load = cls._get_latest_load(definition) last_updated_at = ( latest_success_load.updated_at if latest_success_load else cls._get_latest_data_timestamp(source_items) ) - active_tasks = cls._get_active_tasks(definition) + active_tasks = context.active_tasks_by_slug.get(definition.slug, []) progress = cls._get_progress(active_tasks) status = cls._get_status( definition=definition, @@ -472,6 +528,7 @@ class SourceCardService: ) -> dict[str, Any]: definition = cls.get_definition(slug) params = cls._validate_refresh_params(definition, params or {}) + cls.clear_cache() tasks = cls._launch_refresh( definition, requested_by_id=requested_by_id, params=params ) @@ -490,6 +547,328 @@ class SourceCardService: raise Http404("Карточка источника не найдена") return definition + @classmethod + def _build_stats_context( + cls, + definitions: tuple[SourceCardDefinition, ...], + ) -> SourceCardStatsContext: + parser_sources = cls._get_parser_sources_for_definitions(definitions) + ( + latest_load_by_source, + latest_success_load_by_source, + ) = cls._build_latest_load_maps(parser_sources) + aggregate_stats = cls._get_or_build_aggregate_stats(definitions) + return SourceCardStatsContext( + item_stats_by_code=aggregate_stats.item_stats_by_code, + card_organizations_count_by_slug=( + aggregate_stats.card_organizations_count_by_slug + ), + latest_load_by_source=latest_load_by_source, + latest_success_load_by_source=latest_success_load_by_source, + active_tasks_by_slug=cls._build_active_tasks_by_slug(definitions), + ) + + @classmethod + def _get_or_build_aggregate_stats( + cls, + definitions: tuple[SourceCardDefinition, ...], + ) -> SourceCardAggregateStats: + cache_key = cls._get_aggregate_stats_cache_key(definitions) + cached_stats = cache.get(cache_key) + if isinstance(cached_stats, SourceCardAggregateStats): + return deepcopy(cached_stats) + + organizations_count_by_source_group = ( + cls._build_organizations_count_by_source_group(definitions) + ) + aggregate_stats = SourceCardAggregateStats( + item_stats_by_code=cls._build_item_stats_by_code( + definitions, + organizations_count_by_source_group, + ), + card_organizations_count_by_slug=( + cls._build_card_organizations_count_by_slug( + definitions, + organizations_count_by_source_group, + ) + ), + ) + cache.set( + cache_key, + deepcopy(aggregate_stats), + timeout=SOURCE_CARD_STATS_CACHE_TIMEOUT_SECONDS, + ) + return aggregate_stats + + @staticmethod + def _get_aggregate_stats_cache_key( + definitions: tuple[SourceCardDefinition, ...], + ) -> str: + version = get_source_data_cache_version() + slugs = ",".join(definition.slug for definition in definitions) + slugs_digest = blake2s(slugs.encode(), digest_size=8).hexdigest() + return f"parsers:source_cards:aggregate_stats:{version}:{slugs_digest}" + + @classmethod + def _build_item_stats_by_code( + cls, + definitions: tuple[SourceCardDefinition, ...], + organizations_count_by_source_group: dict[str, int], + ) -> dict[str, SourceRecordStats]: + item_sources_by_code: dict[str, list[str]] = {} + all_sources: list[str] = [] + for definition in definitions: + for item in definition.source_items: + sources = cls._get_sources_for_item_code(item.code) + if not sources: + continue + item_sources_by_code[item.code] = sources + all_sources.extend(sources) + + unique_sources = cls._deduplicate(all_sources) + if not item_sources_by_code or not unique_sources: + return {} + + rows = ( + OrganizationSourceRecord.objects.filter(source__in=unique_sources) + .order_by() + .values("source") + .annotate( + records_count=Count("uid"), + organizations_count=Count("extension_id", distinct=True), + last_updated_at=Max("updated_at"), + ) + ) + stats_by_source = { + row["source"]: SourceRecordStats( + records_count=row["records_count"] or 0, + organizations_count=row["organizations_count"] or 0, + last_updated_at=row["last_updated_at"], + ) + for row in rows + } + + return { + item_code: SourceRecordStats( + records_count=sum( + stats_by_source.get(source, SourceRecordStats()).records_count + for source in sources + ), + organizations_count=cls._get_item_organizations_count( + sources, + stats_by_source, + organizations_count_by_source_group, + ), + last_updated_at=cls._get_latest_timestamp( + [ + stats_by_source[source].last_updated_at + for source in sources + if source in stats_by_source + and stats_by_source[source].last_updated_at + ] + ), + ) + for item_code, sources in item_sources_by_code.items() + } + + @classmethod + def _build_card_organizations_count_by_slug( + cls, + definitions: tuple[SourceCardDefinition, ...], + organizations_count_by_source_group: dict[str, int], + ) -> dict[str, int]: + counts_by_slug: dict[str, int] = {} + for definition in definitions: + sources = cls._get_sources_for_item_codes( + [item.code for item in definition.source_items] + ) + if not sources: + continue + source_group = cls._get_single_source_group(sources) + if source_group is None: + continue + counts_by_slug[definition.slug] = organizations_count_by_source_group.get( + source_group, + 0, + ) + return counts_by_slug + + @classmethod + def _build_organizations_count_by_source_group( + cls, + definitions: tuple[SourceCardDefinition, ...], + ) -> dict[str, int]: + source_groups = cls._get_source_groups_for_definitions(definitions) + if not source_groups: + return {} + + rows = ( + OrganizationSourceExtension.objects.filter( + source_group__in=source_groups, + records_count__gt=0, + ) + .order_by() + .values("source_group") + .annotate(organizations_count=Count("uid")) + ) + return { + row["source_group"]: row["organizations_count"] or 0 + for row in rows + if row["source_group"] + } + + @classmethod + def _build_latest_load_maps( + cls, + parser_sources: list[str], + ) -> tuple[dict[str, ParserLoadLog], dict[str, ParserLoadLog]]: + if not parser_sources: + return {}, {} + + latest_load_by_source: dict[str, ParserLoadLog] = {} + latest_success_load_by_source: dict[str, ParserLoadLog] = {} + queryset = ParserLoadLog.objects.filter(source__in=parser_sources).order_by( + "source", + "-updated_at", + "-created_at", + ) + for load in queryset: + latest_load_by_source.setdefault(load.source, load) + if load.status in SUCCESSFUL_LOAD_STATUSES: + latest_success_load_by_source.setdefault(load.source, load) + + return latest_load_by_source, latest_success_load_by_source + + @classmethod + def _build_active_tasks_by_slug( + cls, + definitions: tuple[SourceCardDefinition, ...], + ) -> dict[str, list[dict[str, Any]]]: + task_names_by_slug = { + definition.slug: set(definition.task_names) + for definition in definitions + if definition.task_names + } + slugs_by_task_name: dict[str, list[str]] = {} + for slug, task_names in task_names_by_slug.items(): + for task_name in task_names: + slugs_by_task_name.setdefault(task_name, []).append(slug) + + if not slugs_by_task_name: + return {} + + active_tasks_by_slug: dict[str, list[dict[str, Any]]] = { + slug: [] for slug in task_names_by_slug + } + cutoff = cls._stale_cutoff() + queryset = ( + BackgroundJobService.get_queryset() + .filter( + task_name__in=list(slugs_by_task_name), + status__in=ACTIVE_JOB_STATUSES, + ) + .filter( + Q(started_at__isnull=False, started_at__gte=cutoff) + | Q(started_at__isnull=True, created_at__gte=cutoff) + ) + .order_by("-created_at") + ) + + for job in queryset: + for slug in slugs_by_task_name.get(job.task_name, []): + if len(active_tasks_by_slug[slug]) >= 10: + continue + active_tasks_by_slug[slug].append(cls._serialize_job(job)) + + return { + slug: active_tasks + for slug, active_tasks in active_tasks_by_slug.items() + if active_tasks + } + + @classmethod + def _get_latest_context_load( + cls, + definition: SourceCardDefinition, + loads_by_source: dict[str, ParserLoadLog], + ) -> ParserLoadLog | None: + loads = [ + loads_by_source[item.parser_source] + for item in definition.source_items + if item.parser_source and item.parser_source in loads_by_source + ] + if not loads: + return None + return max(loads, key=lambda load: (load.updated_at, load.created_at)) + + @classmethod + def _get_item_organizations_count( + cls, + sources: list[str], + stats_by_source: dict[str, SourceRecordStats], + organizations_count_by_source_group: dict[str, int], + ) -> int: + if len(sources) == 1: + return stats_by_source.get( + sources[0], SourceRecordStats() + ).organizations_count + + source_group = cls._get_single_source_group(sources) + if source_group is None: + return sum( + stats_by_source.get(source, SourceRecordStats()).organizations_count + for source in sources + ) + return organizations_count_by_source_group.get(source_group, 0) + + @staticmethod + def _get_latest_timestamp(values: list[datetime]) -> datetime | None: + return max(values) if values else None + + @staticmethod + def _get_parser_sources_for_definitions( + definitions: tuple[SourceCardDefinition, ...], + ) -> list[str]: + return SourceCardService._deduplicate( + [ + item.parser_source + for definition in definitions + for item in definition.source_items + if item.parser_source + ] + ) + + @classmethod + def _get_source_groups_for_definitions( + cls, + definitions: tuple[SourceCardDefinition, ...], + ) -> list[str]: + source_groups = [] + for definition in definitions: + sources = cls._get_sources_for_item_codes( + [item.code for item in definition.source_items] + ) + source_group = cls._get_single_source_group(sources) + if source_group is not None: + source_groups.append(source_group) + return cls._deduplicate(source_groups) + + @staticmethod + def _get_single_source_group(sources: list[str]) -> str | None: + source_groups = set() + for source in sources: + try: + source_groups.add(get_source_group_descriptor(str(source)).source_group) + except ValueError: + continue + if len(source_groups) != 1: + return None + return next(iter(source_groups)) + + @staticmethod + def _deduplicate(values: list[str]) -> list[str]: + return list(dict.fromkeys(values)) + @classmethod def _validate_refresh_params( cls, @@ -746,15 +1125,35 @@ class SourceCardService: return {"task_id": async_result.id, "task_name": task_name} @classmethod - def _build_source_item(cls, item: SourceItemDefinition) -> dict[str, Any]: - records_count = cls._get_source_records_count(item.code) - organizations_count = cls._get_source_organizations_count(item.code) - last_updated_at = cls._get_source_data_timestamp(item.code) - latest_load = cls._get_latest_load_by_source(item.parser_source) - latest_success_load = cls._get_latest_load_by_source( - item.parser_source, - statuses=SUCCESSFUL_LOAD_STATUSES, - ) + def _build_source_item( + cls, + item: SourceItemDefinition, + context: SourceCardStatsContext | None = None, + ) -> dict[str, Any]: + if context is None: + records_count = cls._get_source_records_count(item.code) + organizations_count = cls._get_source_organizations_count(item.code) + last_updated_at = cls._get_source_data_timestamp(item.code) + latest_load = cls._get_latest_load_by_source(item.parser_source) + latest_success_load = cls._get_latest_load_by_source( + item.parser_source, + statuses=SUCCESSFUL_LOAD_STATUSES, + ) + else: + stats = context.item_stats_by_code.get(item.code, SourceRecordStats()) + records_count = stats.records_count + organizations_count = stats.organizations_count + last_updated_at = stats.last_updated_at + latest_load = ( + context.latest_load_by_source.get(item.parser_source) + if item.parser_source + else None + ) + latest_success_load = ( + context.latest_success_load_by_source.get(item.parser_source) + if item.parser_source + else None + ) return { "code": item.code, @@ -783,7 +1182,8 @@ class SourceCardService: def _get_source_organizations_count(cls, item_code: str) -> int: return ( cls._get_source_record_queryset(item_code) - .values("extension__organization_id") + .order_by() + .values("extension_id") .distinct() .count() ) @@ -799,14 +1199,18 @@ class SourceCardService: cls, definition: SourceCardDefinition, source_items: list[dict[str, Any]], + context: SourceCardStatsContext | None = None, ) -> int: source_codes = [item.code for item in definition.source_items] sources = cls._get_sources_for_item_codes(source_codes) if not sources: return sum(item["organizations_count"] for item in source_items) + if context is not None: + return context.card_organizations_count_by_slug.get(definition.slug, 0) return ( OrganizationSourceRecord.objects.filter(source__in=sources) - .values("extension__organization_id") + .order_by() + .values("extension_id") .distinct() .count() ) diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index 539d188..7ffd30b 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -26,6 +26,9 @@ from apps.parsers.clients.checko import ( ContractsRequest, InspectionsRequest, LegalCasesRequest, + ObjectType, + SearchRequest, + SearchType, ) from apps.parsers.clients.checko.exceptions import CheckoError from apps.parsers.clients.common import GenericParserItem, StructuredDataClient @@ -54,6 +57,11 @@ from apps.parsers.services import ( from apps.parsers.source_registry import PARSER_SOURCES from celery import shared_task from django.conf import settings +from organizations.models import Organization as SourceOrganization +from organizations.services import ( + normalize_organization_name as normalize_identity_name, +) +from registers.models import Organization as RegistryOrganization from registers.models import RegistryMembershipPeriod from requests.adapters import BaseAdapter @@ -67,11 +75,13 @@ FEDRESURS_CHECKO_FALLBACK_LIMIT = 100 ARBITRATION_CHECKO_LIMIT = 100 REGISTRY_INSPECTIONS_CHECKO_LIMIT = 1000 REGISTRY_CONTRACTS_CHECKO_LIMIT = 1000 +FSTEC_CHECKO_IDENTITY_LOOKUP_LIMIT = 1000 PARSER_STALE_LOAD_MAX_AGE_MINUTES = 90 PARSER_SOFT_TIME_LIMIT_SECONDS = 15 * 60 PARSER_TIME_LIMIT_SECONDS = 20 * 60 INDUSTRIAL_PRODUCTS_SOFT_TIME_LIMIT_SECONDS = 45 * 60 INDUSTRIAL_PRODUCTS_TIME_LIMIT_SECONDS = 60 * 60 +CHECKO_SEARCH_RESULT_LIMIT = 100 class ParserSourceSkipped(Exception): @@ -90,6 +100,16 @@ class RegistryLookupTarget: name: str +@dataclass(frozen=True) +class FstecIdentityCandidate: + """Однозначно найденные реквизиты заявителя ФСТЭК.""" + + inn: str + ogrn: str + name: str + provider: str + + VACANCY_REGISTRY_MAX_PAGES_PER_ORGANIZATION = 100 VACANCY_REGISTRY_TEXT_SEARCH_MAX_PAGES_PER_ORGANIZATION = 1 VACANCY_EMPLOYER_WORD_RE = re.compile(r"[0-9A-Za-zА-Яа-яЁё]+") @@ -424,12 +444,13 @@ def _fetch_fstec_records( ) -> list[GenericParserItem]: """Загрузить ФСТЭК, не превращая WAF/доступ upstream в вечную ошибку.""" try: - return _fetch_structured_records( + records = _fetch_structured_records( source_key="fstec", file_url=file_url, file_path=file_path, proxies=proxies, ) + return _enrich_fstec_record_identities(records, proxies=proxies) except HTTPClientError as exc: if file_url or file_path: raise @@ -438,6 +459,335 @@ def _fetch_fstec_records( ) from exc +def _enrich_fstec_record_identities( + records: list[GenericParserItem], + *, + proxies: list[str] | None, +) -> list[GenericParserItem]: + """Дозаполнить ИНН/ОГРН заявителей ФСТЭК без неоднозначных совпадений.""" + if not records: + return records + + applicant_names_by_key: dict[str, str] = {} + for record in records: + if record.inn and record.ogrn: + continue + applicant_name = _fstec_record_applicant_name(record) + normalized_name = normalize_identity_name(applicant_name) + if normalized_name and normalized_name not in applicant_names_by_key: + applicant_names_by_key[normalized_name] = applicant_name + + if not applicant_names_by_key: + return records + + local_candidates, local_ambiguous = _fstec_local_identity_candidates( + set(applicant_names_by_key) + ) + unresolved_keys = sorted(set(applicant_names_by_key) - set(local_candidates)) + checko_candidates, checko_ambiguous = _fstec_checko_identity_candidates( + applicant_names_by_key=applicant_names_by_key, + candidate_keys=unresolved_keys, + proxies=proxies, + ) + + enriched_records = [] + enriched_count = 0 + ambiguous_count = 0 + for record in records: + enriched_record, was_enriched, was_ambiguous = _enrich_fstec_record_identity( + record, + local_candidates=local_candidates, + checko_candidates=checko_candidates, + local_ambiguous=local_ambiguous, + checko_ambiguous=checko_ambiguous, + checko_enabled=bool(getattr(settings, "CHECKO_API_KEY", "")), + ) + enriched_records.append(enriched_record) + enriched_count += int(was_enriched) + ambiguous_count += int(was_ambiguous) + + logger.info( + "FSTEC identity enrichment completed: enriched=%d ambiguous=%d " + "local_candidates=%d checko_candidates=%d", + enriched_count, + ambiguous_count, + len(local_candidates), + len(checko_candidates), + ) + return enriched_records + + +def _enrich_fstec_record_identity( + record: GenericParserItem, + *, + local_candidates: dict[str, FstecIdentityCandidate], + checko_candidates: dict[str, FstecIdentityCandidate], + local_ambiguous: set[str], + checko_ambiguous: set[str], + checko_enabled: bool, +) -> tuple[GenericParserItem, bool, bool]: + if record.inn and record.ogrn: + return record, False, False + + normalized_name = normalize_identity_name(_fstec_record_applicant_name(record)) + candidate = local_candidates.get(normalized_name) or checko_candidates.get( + normalized_name + ) + if candidate is not None: + return _replace_fstec_identity(record, candidate), True, False + if normalized_name in checko_ambiguous: + return ( + _replace_fstec_identity_status( + record, + status="ambiguous", + provider="checko", + ), + False, + True, + ) + if normalized_name in local_ambiguous and not checko_enabled: + return ( + _replace_fstec_identity_status( + record, + status="ambiguous", + provider="local", + ), + False, + True, + ) + return record, False, False + + +def _fstec_record_applicant_name(record: GenericParserItem) -> str: + payload = record.payload if isinstance(record.payload, dict) else {} + return str( + record.organisation_name + or payload.get("organisation_name") + or payload.get("Заявитель") + or "" + ).strip() + + +def _fstec_local_identity_candidates( + candidate_keys: set[str], +) -> tuple[dict[str, FstecIdentityCandidate], set[str]]: + candidates_by_key: dict[str, dict[tuple[str, str], FstecIdentityCandidate]] = { + key: {} for key in candidate_keys + } + for organization in RegistryOrganization.objects.only( + "pn_name", + "mn_inn", + "mn_ogrn", + ).iterator(): + normalized_name = normalize_identity_name(organization.pn_name) + if normalized_name not in candidates_by_key: + continue + _add_fstec_identity_candidate( + candidates_by_key[normalized_name], + FstecIdentityCandidate( + inn=_normalize_fstec_identifier(organization.mn_inn, max_length=12), + ogrn=_normalize_fstec_identifier(organization.mn_ogrn, max_length=13), + name=organization.pn_name, + provider="local_registry", + ), + ) + + for organization in ( + SourceOrganization.objects.exclude(inn="") + .exclude(ogrn="") + .only("name", "inn", "ogrn") + .iterator() + ): + normalized_name = normalize_identity_name(organization.name) + if normalized_name not in candidates_by_key: + continue + _add_fstec_identity_candidate( + candidates_by_key[normalized_name], + FstecIdentityCandidate( + inn=_normalize_fstec_identifier(organization.inn, max_length=12), + ogrn=_normalize_fstec_identifier(organization.ogrn, max_length=13), + name=organization.name, + provider="local_canonical", + ), + ) + + unique_candidates = { + key: next(iter(candidates.values())) + for key, candidates in candidates_by_key.items() + if len(candidates) == 1 + } + ambiguous_keys = { + key for key, candidates in candidates_by_key.items() if len(candidates) > 1 + } + return unique_candidates, ambiguous_keys + + +def _fstec_checko_identity_candidates( + *, + applicant_names_by_key: dict[str, str], + candidate_keys: list[str], + proxies: list[str] | None, +) -> tuple[dict[str, FstecIdentityCandidate], set[str]]: + api_key = getattr(settings, "CHECKO_API_KEY", "") + if not api_key or not candidate_keys: + return {}, set() + + lookup_limit = _resolve_lookup_limit( + getattr( + settings, + "FSTEC_CHECKO_IDENTITY_LOOKUP_LIMIT", + FSTEC_CHECKO_IDENTITY_LOOKUP_LIMIT, + ), + default=FSTEC_CHECKO_IDENTITY_LOOKUP_LIMIT, + ) + if lookup_limit <= 0: + return {}, set() + + checko_proxies = ( + proxies if getattr(settings, "CHECKO_USE_RUNTIME_PROXIES", False) else None + ) + client = CheckoClient(api_key=api_key, proxies=checko_proxies, timeout=30) + unique_candidates: dict[str, FstecIdentityCandidate] = {} + ambiguous_keys: set[str] = set() + try: + for normalized_name in candidate_keys[:lookup_limit]: + applicant_name = applicant_names_by_key[normalized_name] + if len(applicant_name) < 4: + continue + try: + response = client.search( + SearchRequest( + by=SearchType.NAME, + obj=ObjectType.ORGANIZATION, + query=applicant_name, + active=True, + limit=CHECKO_SEARCH_RESULT_LIMIT, + ) + ) + except CheckoError as exc: + logger.info( + "Checko FSTEC identity lookup skipped for %s: %s", + applicant_name, + exc, + ) + continue + candidates = _fstec_checko_response_candidates( + normalized_name=normalized_name, + response=response, + ) + if len(candidates) == 1: + unique_candidates[normalized_name] = next(iter(candidates.values())) + elif len(candidates) > 1: + ambiguous_keys.add(normalized_name) + finally: + close = getattr(client, "close", None) + if callable(close): + close() + + return unique_candidates, ambiguous_keys + + +def _fstec_checko_response_candidates( + *, + normalized_name: str, + response, +) -> dict[tuple[str, str], FstecIdentityCandidate]: + candidates: dict[tuple[str, str], FstecIdentityCandidate] = {} + data = getattr(response, "data", None) + for organization in getattr(data, "organizations", ()) if data else (): + matched_name = ( + getattr(organization, "short_name", None) + or getattr(organization, "full_name", None) + or "" + ) + if normalize_identity_name(matched_name) != normalized_name: + continue + _add_fstec_identity_candidate( + candidates, + FstecIdentityCandidate( + inn=_normalize_fstec_identifier( + getattr(organization, "inn", ""), + max_length=12, + ), + ogrn=_normalize_fstec_identifier( + getattr(organization, "ogrn", ""), + max_length=13, + ), + name=str(matched_name), + provider="checko", + ), + ) + return candidates + + +def _add_fstec_identity_candidate( + candidates: dict[tuple[str, str], FstecIdentityCandidate], + candidate: FstecIdentityCandidate, +) -> None: + if not candidate.inn or not candidate.ogrn: + return + candidates.setdefault((candidate.inn, candidate.ogrn), candidate) + + +def _normalize_fstec_identifier(value: str | int | None, *, max_length: int) -> str: + normalized = re.sub(r"\D+", "", str(value or "")) + if not normalized: + return "" + if max_length == 12 and len(normalized) in {9, 11}: + normalized = normalized.zfill(len(normalized) + 1) + valid_lengths = {10, 12} if max_length == 12 else {max_length} + if len(normalized) not in valid_lengths: + return "" + return normalized + + +def _replace_fstec_identity( + record: GenericParserItem, + candidate: FstecIdentityCandidate, +) -> GenericParserItem: + payload = _fstec_payload_with_identity_status( + record, + status="matched", + provider=candidate.provider, + matched_name=candidate.name, + ) + return replace(record, inn=candidate.inn, ogrn=candidate.ogrn, payload=payload) + + +def _replace_fstec_identity_status( + record: GenericParserItem, + *, + status: str, + provider: str, +) -> GenericParserItem: + return replace( + record, + payload=_fstec_payload_with_identity_status( + record, + status=status, + provider=provider, + ), + ) + + +def _fstec_payload_with_identity_status( + record: GenericParserItem, + *, + status: str, + provider: str, + matched_name: str = "", +) -> dict: + payload = dict(record.payload) if isinstance(record.payload, dict) else {} + identity_enrichment = { + "status": status, + "provider": provider, + } + if matched_name: + identity_enrichment["matched_name"] = matched_name + payload["identity_enrichment"] = identity_enrichment + return payload + + def _fetch_checko_bankruptcy_records( *, proxies: list[str] | None, diff --git a/src/apps/parsers/views.py b/src/apps/parsers/views.py index c0a56dd..499d455 100644 --- a/src/apps/parsers/views.py +++ b/src/apps/parsers/views.py @@ -73,7 +73,6 @@ from django.db.models import CharField, Count, Q from django.db.models.functions import Cast, Lower from django.http import HttpResponse from django.utils.text import get_valid_filename -from django.views.generic import TemplateView from django_celery_beat.models import CrontabSchedule, IntervalSchedule, PeriodicTask from drf_yasg import openapi from drf_yasg.inspectors import SwaggerAutoSchema @@ -1760,7 +1759,9 @@ def _matched_registry_organization_ids( def _source_record_queryset_for_parser_source(source: str): if source == ParserLoadLog.Source.TRUDVSEM: - return OrganizationSourceRecord.objects.filter(source__in=VACANCY_RECORD_SOURCES) + return OrganizationSourceRecord.objects.filter( + source__in=VACANCY_RECORD_SOURCES + ) return OrganizationSourceRecord.objects.filter(source=source) @@ -2810,9 +2811,3 @@ class ParserDashboardDataView(APIView): ).data, } ) - - -class ParserDashboardPageView(TemplateView): - """HTML-страница dashboard вне Django admin.""" - - template_name = "dashboard.html" diff --git a/src/core/urls.py b/src/core/urls.py index 5fa8d97..decf142 100644 --- a/src/core/urls.py +++ b/src/core/urls.py @@ -4,7 +4,6 @@ URL Configuration for the project. The `urlpatterns` list routes URLs to views. """ -from apps.parsers.views import ParserDashboardPageView from django.conf import settings from django.conf.urls.static import static from django.contrib import admin @@ -13,12 +12,10 @@ from drf_yasg import openapi from drf_yasg.views import get_schema_view from rest_framework import permissions -# Swagger schema view -schema_view = get_schema_view( - openapi.Info( - title="Mostovik API", - default_version="v1", - description=""" +api_info = openapi.Info( + title="Mostovik API", + default_version="v1", + description=""" ## API документация для проекта Mostovik ### Авторизация @@ -33,9 +30,13 @@ schema_view = get_schema_view( API предоставляет только чтение данных (GET, GET list). Добавление и удаление записей происходит через парсеры и админку. """, - contact=openapi.Contact(email="contact@mostovik.local"), - license=openapi.License(name="BSD License"), - ), + contact=openapi.Contact(email="contact@mostovik.local"), + license=openapi.License(name="BSD License"), +) + +# Swagger schema view +schema_view = get_schema_view( + api_info, public=True, permission_classes=(permissions.AllowAny,), ) @@ -46,28 +47,6 @@ urlpatterns = [ schema_view.with_ui("swagger", cache_timeout=0), name="schema-swagger-ui", ), - path("dashboard", ParserDashboardPageView.as_view(), name="dashboard"), - path("dashboard/", ParserDashboardPageView.as_view(), name="dashboard-slash"), - path( - "dashboard/", - ParserDashboardPageView.as_view(), - name="dashboard-source", - ), - path( - "dashboard//", - ParserDashboardPageView.as_view(), - name="dashboard-source-slash", - ), - path( - "dashboard//", - ParserDashboardPageView.as_view(), - name="dashboard-source-item", - ), - path( - "dashboard///", - ParserDashboardPageView.as_view(), - name="dashboard-source-item-slash", - ), path("admin/", admin.site.urls), path("health/", include("apps.core.urls")), path("api/v1/", include("core.api_v1_urls", namespace="api_v1")), diff --git a/src/organizations/filters.py b/src/organizations/filters.py index d2094c7..96fa3d9 100644 --- a/src/organizations/filters.py +++ b/src/organizations/filters.py @@ -42,7 +42,9 @@ class OrganizationFilter(filters.FilterSet): kpp = filters.CharFilter(field_name="kpp", lookup_expr="exact") ogrn = filters.CharFilter(field_name="ogrn", lookup_expr="exact") ogrip = filters.CharFilter(field_name="ogrip", lookup_expr="exact") - identity_status = filters.CharFilter(field_name="identity_status", lookup_expr="exact") + identity_status = filters.CharFilter( + field_name="identity_status", lookup_expr="exact" + ) registry = filters.UUIDFilter(method="filter_registry") registry_name = filters.CharFilter(method="filter_registry_name") has_registry = filters.BooleanFilter(method="filter_has_registry") diff --git a/src/organizations/models.py b/src/organizations/models.py index 5555aba..4443fc6 100644 --- a/src/organizations/models.py +++ b/src/organizations/models.py @@ -14,14 +14,23 @@ from organizations.name_normalization import normalize_organization_name class SourceGroup(models.TextChoices): """Product-level organization source groups.""" - FINANCIAL_INDICATORS = "financial_indicators", _("Финансово-экономические показатели") + FINANCIAL_INDICATORS = ( + "financial_indicators", + _("Финансово-экономические показатели"), + ) GOVERNMENT_PROCUREMENTS = "government_procurements", _("Государственные закупки") - INDUSTRIAL_PRODUCTION = "industrial_production", _("Производители и продукция России") + INDUSTRIAL_PRODUCTION = ( + "industrial_production", + _("Производители и продукция России"), + ) PLANNED_INSPECTIONS = "planned_inspections", _("Плановые проверки") BANKRUPTCY = "bankruptcy", _("Сведения о процедурах банкротства") DEFENSE_SUPPLIERS = "defense_suppliers", _("Недобросовестные поставщики ГОЗ") ARBITRATION = "arbitration", _("Арбитражные дела") - SECURITY_REGISTRIES = "security_registries", _("Реестры по информационной безопасности") + SECURITY_REGISTRIES = ( + "security_registries", + _("Реестры по информационной безопасности"), + ) VACANCIES = "vacancies", _("Вакансии") @@ -472,6 +481,100 @@ class OrganizationSourceRecord(models.Model): def __str__(self) -> str: return self.title or self.external_id or str(self.uid) + @property + def id(self): + """Compatibility alias for legacy parser services that exposed integer id.""" + return self.pk + + @property + def lines(self): + """Compatibility alias for financial reports stored as source records.""" + return self.financial_lines + + @property + def inn(self) -> str: + """Return the canonical organization INN or source payload INN.""" + organization = self.extension.organization + return organization.inn or str((self.payload or {}).get("inn") or "") + + @property + def kpp(self) -> str: + """Return the canonical organization KPP or source payload KPP.""" + organization = self.extension.organization + return organization.kpp or str((self.payload or {}).get("kpp") or "") + + @property + def ogrn(self) -> str: + """Return the canonical organization OGRN or source payload OGRN.""" + organization = self.extension.organization + return organization.ogrn or str((self.payload or {}).get("ogrn") or "") + + @property + def ogrip(self) -> str: + """Return the canonical organization OGRIP or source payload OGRIP.""" + organization = self.extension.organization + return organization.ogrip or str((self.payload or {}).get("ogrip") or "") + + @property + def registry_organization(self): + """Best-effort active registry organization matched by canonical identity.""" + from django.db.models import CharField + from django.db.models.functions import Cast + from registers.models import Organization as RegistryOrganization + from registers.models import RegistryMembershipPeriod + + def _registry_numeric_values(value: str) -> list[str]: + stripped = str(value or "").lstrip("0") + return [value, stripped] if stripped and stripped != value else [value] + + identity_filter = Q() + inn = self.inn + ogrn = self.ogrn + ogrip = self.ogrip + if inn: + identity_filter |= Q(registry_inn_text__in=_registry_numeric_values(inn)) + if ogrn: + identity_filter |= Q(registry_ogrn_text__in=_registry_numeric_values(ogrn)) + if ogrip: + identity_filter |= Q(registry_ogrn_text__in=_registry_numeric_values(ogrip)) + if not identity_filter: + return None + + membership = ( + RegistryMembershipPeriod.objects.filter(ended_at__isnull=True) + .select_related("organization") + .annotate( + registry_inn_text=Cast( + "organization__mn_inn", + output_field=CharField(), + ), + registry_ogrn_text=Cast( + "organization__mn_ogrn", + output_field=CharField(), + ), + ) + .filter(identity_filter) + .order_by("organization__pn_name", "organization_id") + .first() + ) + if membership is not None: + return membership.organization + + return ( + RegistryOrganization.objects.annotate( + registry_inn_text=Cast("mn_inn", output_field=CharField()), + registry_ogrn_text=Cast("mn_ogrn", output_field=CharField()), + ) + .filter(identity_filter) + .order_by("pn_name", "id") + .first() + ) + + @property + def registry_organization_id(self): + registry_organization = self.registry_organization + return registry_organization.id if registry_organization is not None else None + class OrganizationSourceFinancialLine(models.Model): """Structured financial report line under a source record.""" diff --git a/src/organizations/serializers.py b/src/organizations/serializers.py index 344dc59..d413b86 100644 --- a/src/organizations/serializers.py +++ b/src/organizations/serializers.py @@ -2,6 +2,7 @@ from django.db.models import CharField, Q from django.db.models.functions import Cast +from drf_yasg.utils import swagger_serializer_method from registers.models import RegistryMembershipPeriod from rest_framework import serializers @@ -32,13 +33,28 @@ class OrganizationSourceFinancialLineSerializer(serializers.ModelSerializer): read_only_fields = fields +class OrganizationSourceRecordOrganizationSerializer(serializers.Serializer): + """Organization summary embedded into one source record.""" + + uid = serializers.UUIDField(read_only=True) + name = serializers.CharField(read_only=True, allow_blank=True) + inn = serializers.CharField(read_only=True, allow_blank=True) + kpp = serializers.CharField(read_only=True, allow_blank=True) + ogrn = serializers.CharField(read_only=True, allow_blank=True) + ogrip = serializers.CharField(read_only=True, allow_blank=True) + + class OrganizationSourceRecordSerializer(serializers.ModelSerializer): """Source record stored under one source extension.""" extension_uid = serializers.UUIDField(source="extension.uid", read_only=True) - financial_lines = OrganizationSourceFinancialLineSerializer(many=True, read_only=True) + financial_lines = OrganizationSourceFinancialLineSerializer( + many=True, read_only=True + ) organization = serializers.SerializerMethodField() - source_group = serializers.CharField(source="extension.source_group", read_only=True) + source_group = serializers.CharField( + source="extension.source_group", read_only=True + ) class Meta: model = OrganizationSourceRecord @@ -65,6 +81,9 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer): ] read_only_fields = fields + @swagger_serializer_method( + serializer_or_field=OrganizationSourceRecordOrganizationSerializer, + ) def get_organization(self, obj) -> dict[str, str]: organization = obj.extension.organization registry_organization = self._get_registry_organization(organization) @@ -75,9 +94,9 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer): ogrn = organization.ogrn if registry_organization is not None: name = registry_organization.pn_name or name - inn = self._value_to_string(registry_organization.mn_inn) or inn - kpp = self._value_to_string(registry_organization.in_kpp) or kpp - ogrn = self._value_to_string(registry_organization.mn_ogrn) or ogrn + inn = inn or self._value_to_inn_string(registry_organization.mn_inn) + kpp = kpp or self._value_to_string(registry_organization.in_kpp) + ogrn = ogrn or self._value_to_string(registry_organization.mn_ogrn) return { "uid": str(organization.uid), @@ -185,6 +204,22 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer): return "" return str(value) + @classmethod + def _value_to_inn_string(cls, value) -> str: + inn = cls._value_to_string(value) + if len(inn) in {9, 11} and inn.isdigit(): + return inn.zfill(len(inn) + 1) + return inn + + +class OrganizationSourceRecordListResponseSerializer(serializers.Serializer): + """Paginated source-record list response in unified API format.""" + + success = serializers.BooleanField(read_only=True) + data = OrganizationSourceRecordSerializer(many=True, read_only=True) + errors = serializers.JSONField(read_only=True, allow_null=True) + meta = serializers.JSONField(read_only=True, allow_null=True) + class OrganizationSourceExtensionSerializer(serializers.ModelSerializer): """Compact source extension representation.""" @@ -249,5 +284,7 @@ class OrganizationSerializer(serializers.ModelSerializer): "id": str(membership.registry_id), "name": membership.registry.name, } - for membership in query.select_related("registry").order_by("registry__name") + for membership in query.select_related("registry").order_by( + "registry__name" + ) ] diff --git a/src/organizations/signals.py b/src/organizations/signals.py index ebd7d42..ebb344a 100644 --- a/src/organizations/signals.py +++ b/src/organizations/signals.py @@ -2,7 +2,10 @@ from __future__ import annotations +import logging + from apps.parsers.models import ParserLoadLog +from django.conf import settings from django.db import transaction from django.db.models.signals import post_delete, post_save from django.dispatch import receiver @@ -18,6 +21,8 @@ from registers.models import ( from organizations.cache import invalidate_organization_api_cache from organizations.models import OrganizationDataSnapshot +logger = logging.getLogger(__name__) + SOURCE_UPDATE_STATUSES = { ParserLoadLog.Status.SUCCESS, ParserLoadLog.Status.SKIPPED, @@ -28,6 +33,21 @@ def _invalidate_on_commit() -> None: transaction.on_commit(invalidate_organization_api_cache) +def _warm_main_dashboard_cache_on_commit() -> None: + def enqueue_warm_task() -> None: + try: + from registers.tasks import warm_main_dashboard_cache_task + + if getattr(settings, "CELERY_TASK_ALWAYS_EAGER", False): + warm_main_dashboard_cache_task.apply() + else: + warm_main_dashboard_cache_task.delay() + except Exception: + logger.exception("Failed to enqueue main dashboard cache warm task") + + transaction.on_commit(enqueue_warm_task) + + @receiver( post_save, sender=ParserLoadLog, dispatch_uid="organizations_parser_load_save" ) @@ -35,6 +55,7 @@ def invalidate_for_parser_load(sender, instance: ParserLoadLog, **kwargs) -> Non """Invalidate when a parser source reaches a visible terminal state.""" if instance.status in SOURCE_UPDATE_STATUSES: _invalidate_on_commit() + _warm_main_dashboard_cache_on_commit() @receiver(post_save, sender=Register, dispatch_uid="organizations_register_save") diff --git a/src/organizations/source_backfill.py b/src/organizations/source_backfill.py index b53a574..b45dfff 100644 --- a/src/organizations/source_backfill.py +++ b/src/organizations/source_backfill.py @@ -52,16 +52,21 @@ class OrganizationSourceBackfillResult: updated_financial_lines: int = 0 unresolved: int = 0 - def plus(self, other: OrganizationSourceBackfillResult) -> OrganizationSourceBackfillResult: + def plus( + self, other: OrganizationSourceBackfillResult + ) -> OrganizationSourceBackfillResult: return OrganizationSourceBackfillResult( scanned=self.scanned + other.scanned, - created_organizations=self.created_organizations + other.created_organizations, + created_organizations=self.created_organizations + + other.created_organizations, created_extensions=self.created_extensions + other.created_extensions, updated_extensions=self.updated_extensions + other.updated_extensions, created_records=self.created_records + other.created_records, updated_records=self.updated_records + other.updated_records, - created_financial_lines=self.created_financial_lines + other.created_financial_lines, - updated_financial_lines=self.updated_financial_lines + other.updated_financial_lines, + created_financial_lines=self.created_financial_lines + + other.created_financial_lines, + updated_financial_lines=self.updated_financial_lines + + other.updated_financial_lines, unresolved=self.unresolved + other.unresolved, ) @@ -131,14 +136,20 @@ class OrganizationSourceBackfillService: with transaction.atomic(): for adapter in adapters: scanned += 1 - organization, organization_created = cls._resolve_or_create_organization(adapter) + ( + organization, + organization_created, + ) = cls._resolve_or_create_organization(adapter) if organization is None: unresolved += 1 continue if organization_created: created_organizations += 1 - extension, extension_created = descriptor.extension_model.objects.get_or_create( + ( + extension, + extension_created, + ) = descriptor.extension_model.objects.get_or_create( organization=organization, defaults={ "source_group": descriptor.source_group, @@ -149,9 +160,14 @@ class OrganizationSourceBackfillService: if extension_created: created_extensions += 1 else: - updated_extensions += cls._update_extension(extension, descriptor, adapter) + updated_extensions += cls._update_extension( + extension, descriptor, adapter + ) - source_record, record_created = OrganizationSourceRecord.objects.update_or_create( + ( + source_record, + record_created, + ) = OrganizationSourceRecord.objects.update_or_create( legacy_model=adapter.legacy_model, legacy_pk=adapter.legacy_pk, defaults={ @@ -174,7 +190,9 @@ class OrganizationSourceBackfillService: updated_records += 1 if adapter.source == ParserLoadLog.Source.FNS_REPORTS: - line_result = cls._backfill_financial_lines(source_record, adapter.legacy_pk) + line_result = cls._backfill_financial_lines( + source_record, adapter.legacy_pk + ) created_financial_lines += line_result[0] updated_financial_lines += line_result[1] @@ -204,7 +222,10 @@ class OrganizationSourceBackfillService: if extension.title != descriptor.title: extension.title = descriptor.title changed = True - if adapter.load_batch is not None and extension.last_load_batch != adapter.load_batch: + if ( + adapter.load_batch is not None + and extension.last_load_batch != adapter.load_batch + ): extension.last_load_batch = adapter.load_batch changed = True if changed: @@ -371,7 +392,9 @@ class OrganizationSourceBackfillService: @staticmethod def _refresh_extension_counters(extension_ids: set[str]) -> None: - for extension in OrganizationSourceExtension.objects.filter(uid__in=extension_ids): + for extension in OrganizationSourceExtension.objects.filter( + uid__in=extension_ids + ): aggregate = extension.records.aggregate( records_count=Count("uid"), first_seen_at=Min("created_at"), @@ -540,13 +563,16 @@ class OrganizationSourceBackfillService: url: str = "", payload: dict[str, Any] | None = None, ) -> LegacyRecordAdapter: - normalized_inn, normalized_kpp, normalized_ogrn, normalized_ogrip = ( - normalize_identity_fields( - inn=inn, - kpp=kpp, - ogrn=ogrn, - ogrip=ogrip, - ) + ( + normalized_inn, + normalized_kpp, + normalized_ogrn, + normalized_ogrip, + ) = normalize_identity_fields( + inn=inn, + kpp=kpp, + ogrn=ogrn, + ogrip=ogrip, ) return LegacyRecordAdapter( source=str(descriptor.source), diff --git a/src/organizations/source_cache.py b/src/organizations/source_cache.py new file mode 100644 index 0000000..e5708a9 --- /dev/null +++ b/src/organizations/source_cache.py @@ -0,0 +1,24 @@ +"""Cache versioning for organization source-derived aggregates.""" + +from __future__ import annotations + +from uuid import uuid4 + +from django.core.cache import cache + +SOURCE_DATA_CACHE_VERSION_KEY = "organizations:source_data:version" + + +def get_source_data_cache_version() -> str: + """Return current cache version for aggregates derived from source records.""" + return str(cache.get(SOURCE_DATA_CACHE_VERSION_KEY) or "initial") + + +def invalidate_source_data_cache() -> None: + """Invalidate aggregates derived from organization source records.""" + version = uuid4() + cache.set( + SOURCE_DATA_CACHE_VERSION_KEY, + version.hex if hasattr(version, "hex") else str(version), + timeout=None, + ) diff --git a/src/organizations/source_ingestion.py b/src/organizations/source_ingestion.py index ad7cc47..5920723 100644 --- a/src/organizations/source_ingestion.py +++ b/src/organizations/source_ingestion.py @@ -20,6 +20,7 @@ from organizations.models import ( OrganizationSourceRecord, ) from organizations.name_normalization import normalize_organization_name +from organizations.source_cache import invalidate_source_data_cache from organizations.source_groups import ( SourceGroupDescriptor, get_source_group_descriptor, @@ -128,18 +129,21 @@ class OrganizationSourceIngestionService: with transaction.atomic(): normalized_records = cls._normalize_records(records) - organizations_by_index, created_organizations = ( - cls._resolve_or_create_organizations(normalized_records) - ) + ( + organizations_by_index, + created_organizations, + ) = cls._resolve_or_create_organizations(normalized_records) del normalized_records unresolved = scanned - len(organizations_by_index) - extensions_by_organization_id, created_extensions, updated_extensions = ( - cls._resolve_or_create_extensions( - descriptor=descriptor, - load_batch=load_batch, - organizations=organizations_by_index.values(), - ) + ( + extensions_by_organization_id, + created_extensions, + updated_extensions, + ) = cls._resolve_or_create_extensions( + descriptor=descriptor, + load_batch=load_batch, + organizations=organizations_by_index.values(), ) touched_extension_ids: set[str] = set() @@ -166,6 +170,8 @@ class OrganizationSourceIngestionService: cls._refresh_extension_counters(touched_extension_ids) + invalidate_source_data_cache() + return OrganizationSourceIngestionResult( scanned=scanned, created_organizations=created_organizations, @@ -256,10 +262,18 @@ class OrganizationSourceIngestionService: organizations_by_index, ) - return cls._create_missing_organizations( + ( + organizations_by_index, + created_organizations, + ) = cls._create_missing_organizations( normalized_records, organizations_by_index, ) + cls._update_resolved_organization_identities( + normalized_records, + organizations_by_index, + ) + return organizations_by_index, created_organizations @classmethod def _resolve_organizations_by_inn_kpp( @@ -285,7 +299,9 @@ class OrganizationSourceIngestionService: for inn, kpp in chunk: query |= Q(inn=inn, kpp=kpp) for organization in Organization.objects.filter(query): - organizations_by_key[(organization.inn, organization.kpp)] = organization + organizations_by_key[ + (organization.inn, organization.kpp) + ] = organization for record in normalized_records: if record.index in organizations_by_index: @@ -384,6 +400,7 @@ class OrganizationSourceIngestionService: record.organization_name.strip() for record in normalized_records if record.index not in organizations_by_index + and not cls._record_has_identity(record) and normalize_organization_name(record.organization_name) } ) @@ -406,10 +423,136 @@ class OrganizationSourceIngestionService: for record in normalized_records: if record.index in organizations_by_index: continue + if cls._record_has_identity(record): + continue organization = unique_by_name.get(record.organization_name.strip().lower()) if organization is not None: organizations_by_index[record.index] = organization + @staticmethod + def _record_has_identity(record: _NormalizedRecordInput) -> bool: + return bool(record.inn or record.ogrn or record.ogrip) + + @classmethod + def _update_resolved_organization_identities( + cls, + normalized_records: list[_NormalizedRecordInput], + organizations_by_index: dict[int, Organization], + ) -> None: + safe_inn_by_organization_id = cls._safe_missing_inn_updates( + normalized_records, + organizations_by_index, + ) + changed_by_uid: dict[str, Organization] = {} + for record in normalized_records: + organization = organizations_by_index.get(record.index) + if organization is None: + continue + if cls._apply_missing_identity_fields( + organization, + record, + safe_inn_by_organization_id=safe_inn_by_organization_id, + ): + changed_by_uid[str(organization.uid)] = organization + + if changed_by_uid: + Organization.objects.bulk_update( + list(changed_by_uid.values()), + fields=[ + "name", + "inn", + "kpp", + "ogrn", + "ogrip", + "identity_status", + "primary_identity", + ], + batch_size=cls.chunk_size, + ) + + @classmethod + def _safe_missing_inn_updates( + cls, + normalized_records: list[_NormalizedRecordInput], + organizations_by_index: dict[int, Organization], + ) -> dict[str, str]: + desired_inn_by_organization_id: dict[str, str] = {} + desired_organization_ids_by_inn: dict[str, set[str]] = defaultdict(set) + for record in normalized_records: + organization = organizations_by_index.get(record.index) + if organization is None or organization.inn or not record.inn: + continue + organization_id = str(organization.uid) + desired_inn_by_organization_id.setdefault(organization_id, record.inn) + desired_organization_ids_by_inn[record.inn].add(organization_id) + + conflicting_inns = set() + if desired_inn_by_organization_id: + conflicting_inns = set( + Organization.objects.filter( + inn__in=set(desired_inn_by_organization_id.values()) + ) + .exclude(uid__in=list(desired_inn_by_organization_id)) + .values_list("inn", flat=True) + ) + + safe_updates = {} + for organization_id, inn in desired_inn_by_organization_id.items(): + if ( + len(desired_organization_ids_by_inn[inn]) == 1 + and inn not in conflicting_inns + ): + safe_updates[organization_id] = inn + return safe_updates + + @classmethod + def _apply_missing_identity_fields( + cls, + organization: Organization, + record: _NormalizedRecordInput, + *, + safe_inn_by_organization_id: dict[str, str], + ) -> bool: + changed = False + organization_id = str(organization.uid) + safe_inn = safe_inn_by_organization_id.get(organization_id) + if not organization.inn and safe_inn == record.inn: + organization.inn = record.inn + changed = True + if not organization.kpp and record.kpp: + organization.kpp = record.kpp + changed = True + if not organization.ogrn and record.ogrn: + organization.ogrn = record.ogrn + changed = True + if not organization.ogrip and record.ogrip: + organization.ogrip = record.ogrip + changed = True + if cls._should_replace_placeholder_name(organization, record.organization_name): + organization.name = record.organization_name.strip() + changed = True + if changed: + organization.identity_status = organization._resolve_identity_status() + organization.primary_identity = organization._resolve_primary_identity() + return changed + + @staticmethod + def _should_replace_placeholder_name( + organization: Organization, + candidate_name: str, + ) -> bool: + normalized_candidate = normalize_organization_name(candidate_name) + if not normalized_candidate: + return False + current_name = organization.name.strip() + if not normalize_organization_name(current_name): + return True + return current_name in { + organization.inn, + organization.ogrn, + organization.ogrip, + } + @classmethod def _create_missing_organizations( cls, @@ -510,7 +653,9 @@ class OrganizationSourceIngestionService: load_batch: int | None, organizations: Iterable[Organization], ) -> tuple[dict[Any, OrganizationSourceExtension], int, int]: - unique_organizations = {organization.uid: organization for organization in organizations} + unique_organizations = { + organization.uid: organization for organization in organizations + } if not unique_organizations: return {}, 0, 0 @@ -526,13 +671,13 @@ class OrganizationSourceIngestionService: for organization_id, organization in unique_organizations.items(): if organization_id in extensions_by_organization_id: continue - extensions_by_organization_id[organization_id] = ( - descriptor.extension_model.objects.create( - organization=organization, - source_group=descriptor.source_group, - title=descriptor.title, - last_load_batch=load_batch, - ) + extensions_by_organization_id[ + organization_id + ] = descriptor.extension_model.objects.create( + organization=organization, + source_group=descriptor.source_group, + title=descriptor.title, + last_load_batch=load_batch, ) created_extensions += 1 @@ -579,12 +724,14 @@ class OrganizationSourceIngestionService: updated_financial_lines = 0 for chunk in cls._iter_chunks(record_inputs_with_extensions, cls.chunk_size): - source_records_by_index, chunk_created, chunk_updated = ( - cls._bulk_upsert_source_records_chunk( - descriptor=descriptor, - load_batch=load_batch, - record_inputs_with_extensions=chunk, - ) + ( + source_records_by_index, + chunk_created, + chunk_updated, + ) = cls._bulk_upsert_source_records_chunk( + descriptor=descriptor, + load_batch=load_batch, + record_inputs_with_extensions=chunk, ) created_records += chunk_created updated_records += chunk_updated diff --git a/src/organizations/views.py b/src/organizations/views.py index 646f8d8..f1e36f0 100644 --- a/src/organizations/views.py +++ b/src/organizations/views.py @@ -36,6 +36,7 @@ from organizations.models import ( from organizations.serializers import ( OrganizationSerializer, OrganizationSourceExtensionSerializer, + OrganizationSourceRecordListResponseSerializer, OrganizationSourceRecordSerializer, ) @@ -181,19 +182,15 @@ SOURCE_RECORD_LIST_PARAMS = [ "статусу, датам, URL и исходным данным записи." ), ), - _query_parameter("page", description="Номер страницы.", param_type=openapi.TYPE_INTEGER), + _query_parameter( + "page", description="Номер страницы.", param_type=openapi.TYPE_INTEGER + ), _query_parameter( "page_size", description="Размер страницы. Максимум 100.", param_type=openapi.TYPE_INTEGER, ), ] -ORGANIZATION_LIST_RESPONSE = openapi.Response( - description="Пагинированный список организаций v2 с компактными источниками.", -) -ORGANIZATION_DETAIL_RESPONSE = openapi.Response( - description="Карточка организации v2.", -) class CachedReadOnlyMixin: @@ -300,7 +297,12 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet): "записи доступны через endpoints расширений источников." ), manual_parameters=ORGANIZATION_LIST_PARAMS, - responses={200: ORGANIZATION_LIST_RESPONSE}, + responses={ + 200: openapi.Response( + "Пагинированный список организаций.", + OrganizationSerializer(many=True), + ) + }, ) def list(self, request, *args: Any, **kwargs: Any) -> Response: return self._cached_response( @@ -317,7 +319,13 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet): "группами источников." ), manual_parameters=ORGANIZATION_DETAIL_PARAMS, - responses={200: ORGANIZATION_DETAIL_RESPONSE, 404: "Организация не найдена"}, + responses={ + 200: openapi.Response( + "Карточка организации.", + OrganizationSerializer, + ), + 404: "Организация не найдена", + }, ) def retrieve(self, request, *args: Any, **kwargs: Any) -> Response: return self._cached_response( @@ -330,7 +338,10 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet): operation_id="v2_organizations_sources", operation_summary="Источники организации", operation_description="Возвращает source extensions одной организации.", - responses={200: "Список source extensions", 404: "Организация не найдена"}, + responses={ + 200: OrganizationSourceExtensionSerializer(many=True), + 404: "Организация не найдена", + }, ) @action(detail=True, methods=["get"]) def sources(self, request, *args: Any, **kwargs: Any) -> Response: @@ -345,7 +356,9 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet): class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet): """Read-only API for source extensions and their records.""" - queryset = OrganizationSourceExtension.objects.select_related("organization").order_by( + queryset = OrganizationSourceExtension.objects.select_related( + "organization" + ).order_by( "organization__name", "source_group", ) @@ -367,7 +380,10 @@ class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet): operation_summary="Записи источника организации", operation_description="Возвращает записи под конкретным source extension.", manual_parameters=SOURCE_EXTENSION_PATH_PARAMS, - responses={200: "Пагинированный список записей источника", 404: "Источник не найден"}, + responses={ + 200: OrganizationSourceRecordListResponseSerializer, + 404: "Источник не найден", + }, ) @action(detail=True, methods=["get"]) def records(self, request, *args: Any, **kwargs: Any) -> Response: @@ -388,10 +404,14 @@ class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet): class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet): """Read-only flat API for source records across source extensions.""" - queryset = OrganizationSourceRecord.objects.select_related( - "extension", - "extension__organization", - ).prefetch_related("financial_lines").order_by("-created_at", "-uid") + queryset = ( + OrganizationSourceRecord.objects.select_related( + "extension", + "extension__organization", + ) + .prefetch_related("financial_lines") + .order_by("-created_at", "-uid") + ) serializer_class = OrganizationSourceRecordSerializer permission_classes = [IsAuthenticated] lookup_field = "uid" @@ -465,7 +485,10 @@ class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet): @staticmethod def _registry_membership_query(): - inn_values, ogrn_values = OrganizationFilter._registry_identity_value_querysets() + ( + inn_values, + ogrn_values, + ) = OrganizationFilter._registry_identity_value_querysets() return ( Q(extension__organization__inn__in=inn_values) @@ -552,7 +575,7 @@ class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet): "данными организации и финансовыми строками при наличии." ), manual_parameters=SOURCE_RECORD_LIST_PARAMS, - responses={200: "Пагинированный список записей источников"}, + responses={200: OrganizationSourceRecordListResponseSerializer}, ) def list(self, request, *args: Any, **kwargs: Any) -> Response: return super().list(request, *args, **kwargs) diff --git a/src/registers/dashboard_cache.py b/src/registers/dashboard_cache.py new file mode 100644 index 0000000..83a004f --- /dev/null +++ b/src/registers/dashboard_cache.py @@ -0,0 +1,158 @@ +"""Cached data builders for the main dashboard.""" + +from __future__ import annotations + +from typing import Any + +from apps.parsers.serializers import SourceCardSerializer +from apps.parsers.source_cards import SourceCardService +from django.core.cache import cache +from django.db.models import Count, Max +from django.utils import timezone +from organizations.source_cache import get_source_data_cache_version + +from registers.models import Organization, RegisterUpload, RegistryMembershipPeriod + +MAIN_DASHBOARD_CACHE_TIMEOUT_SECONDS = 7 * 24 * 60 * 60 + +REGISTRY_STAT_CARD_DEFINITIONS = ( + { + "slug": "opk", + "title": "Организации, входящие в реестр предприятий ОПК", + "registry_name": "Реестр предприятий ОПК", + }, + { + "slug": "rosatom", + "title": "Организации, входящие в реестр госкорпорации Росатом", + "registry_name": "Реестр госкорпорации Росатом", + }, + { + "slug": "roscosmos", + "title": "Организации, входящие в реестр госкорпорации Роскосмос", + "registry_name": "Реестр госкорпорации Роскосмос", + }, + { + "slug": "rosatom-opk", + "title": "Организации, входящие в реестр госкорпорации Росатом ОПК", + "registry_name": "Реестр госкорпорации Росатом ОПК", + }, + { + "slug": "rosatom-goz", + "title": "Организации, входящие в реестр госкорпорации Росатом ГОЗ", + "registry_name": "Реестр госкорпорации Росатом ГОЗ", + }, + { + "slug": "roscosmos-goz", + "title": "Организации, входящие в реестр госкорпорации Роскосмос ГОЗ", + "registry_name": "Реестр госкорпорации Роскосмос ГОЗ", + }, + { + "slug": "roscosmos-opk", + "title": "Организации, входящие в реестр госкорпорации Роскосмос ОПК", + "registry_name": "Реестр госкорпорации Роскосмос ОПК", + }, +) + + +def build_registry_stats_payload() -> dict[str, Any]: + """Build registry counters used by stats cards and the main dashboard.""" + registry_names = [item["registry_name"] for item in REGISTRY_STAT_CARD_DEFINITIONS] + active_counts_by_name = { + row["registry__name"]: row["organizations_count"] + for row in ( + RegistryMembershipPeriod.objects.filter( + ended_at__isnull=True, + registry__name__in=registry_names, + ) + .values("registry__name") + .annotate( + organizations_count=Count("organization_id", distinct=True), + ) + ) + } + total_organizations = Organization.objects.count() + active_registry_organizations = ( + RegistryMembershipPeriod.objects.filter(ended_at__isnull=True) + .order_by() + .values("organization_id") + .distinct() + .count() + ) + + cards = [ + { + "slug": "total", + "title": "Общее количество организаций", + "registry_name": None, + "organizations_count": total_organizations, + "order": 0, + } + ] + for order, definition in enumerate(REGISTRY_STAT_CARD_DEFINITIONS, start=10): + cards.append( + { + "slug": definition["slug"], + "title": definition["title"], + "registry_name": definition["registry_name"], + "organizations_count": active_counts_by_name.get( + definition["registry_name"], + 0, + ), + "order": order, + } + ) + + return { + "total_organizations": total_organizations, + "active_registry_organizations": active_registry_organizations, + "counts": {item["slug"]: item["organizations_count"] for item in cards}, + "cards": cards, + } + + +def get_main_dashboard_cache_key() -> str: + """Return cache key for the current source/register data signature.""" + latest_register_upload_at = RegisterUpload.objects.aggregate( + latest_updated_at=Max("updated_at"), + )["latest_updated_at"] + register_signature = ( + latest_register_upload_at.isoformat() + if latest_register_upload_at is not None + else "empty" + ) + return ( + "frontend:main_dashboard:" + f"source_v{get_source_data_cache_version()}:" + f"registers:{register_signature}" + ) + + +def build_main_dashboard_payload() -> dict[str, Any]: + """Build the main dashboard payload without touching cache.""" + source_cards = SourceCardSerializer( + SourceCardService.list_cards(), + many=True, + ).data + return { + "source_cards": source_cards, + "organization_stats": build_registry_stats_payload(), + "generated_at": timezone.now(), + "cache_ttl_seconds": MAIN_DASHBOARD_CACHE_TIMEOUT_SECONDS, + } + + +def get_cached_main_dashboard_payload() -> dict[str, Any] | None: + """Read current main dashboard payload from cache.""" + cached_payload = cache.get(get_main_dashboard_cache_key()) + return cached_payload if isinstance(cached_payload, dict) else None + + +def warm_main_dashboard_cache() -> dict[str, Any]: + """Build and store current main dashboard payload.""" + payload = build_main_dashboard_payload() + cache.set( + get_main_dashboard_cache_key(), + payload, + timeout=MAIN_DASHBOARD_CACHE_TIMEOUT_SECONDS, + ) + return payload diff --git a/src/registers/serializers.py b/src/registers/serializers.py index 49b4610..f5826b9 100644 --- a/src/registers/serializers.py +++ b/src/registers/serializers.py @@ -1,5 +1,6 @@ """Сериализаторы для API реестров.""" +from apps.parsers.serializers import SourceCardSerializer from rest_framework import serializers from registers.models import ( @@ -44,6 +45,55 @@ class RegisterListResponseSerializer(serializers.Serializer): results = RegisterSerializer(many=True, read_only=True) +class RegistryStatCardSerializer(serializers.Serializer): + """Карточка сводной статистики по одному реестру.""" + + slug = serializers.CharField(read_only=True) + title = serializers.CharField(read_only=True) + registry_name = serializers.CharField(read_only=True, allow_null=True) + organizations_count = serializers.IntegerField(read_only=True) + order = serializers.IntegerField(read_only=True) + + +class RegistryStatsResponseSerializer(serializers.Serializer): + """Сводные счетчики организаций по реестрам.""" + + total_organizations = serializers.IntegerField(read_only=True) + active_registry_organizations = serializers.IntegerField(read_only=True) + counts = serializers.DictField( + child=serializers.IntegerField(), + read_only=True, + ) + cards = RegistryStatCardSerializer(many=True, read_only=True) + + +class RegistryStatsApiResponseSerializer(serializers.Serializer): + """Frontend-friendly wrapper для сводной статистики реестров.""" + + success = serializers.BooleanField(read_only=True) + data = RegistryStatsResponseSerializer(read_only=True) + errors = serializers.JSONField(read_only=True, allow_null=True) + meta = serializers.JSONField(read_only=True, allow_null=True) + + +class MainDashboardDataSerializer(serializers.Serializer): + """Агрегированные данные для главной страницы фронта.""" + + source_cards = SourceCardSerializer(many=True, read_only=True) + organization_stats = RegistryStatsResponseSerializer(read_only=True) + generated_at = serializers.DateTimeField(read_only=True) + cache_ttl_seconds = serializers.IntegerField(read_only=True) + + +class MainDashboardResponseSerializer(serializers.Serializer): + """Frontend-friendly wrapper для агрегата главной страницы.""" + + success = serializers.BooleanField(read_only=True) + data = MainDashboardDataSerializer(read_only=True) + errors = serializers.JSONField(read_only=True, allow_null=True) + meta = serializers.JSONField(read_only=True, allow_null=True) + + class RegisterUploadSuccessSerializer(serializers.Serializer): """Минимальный ответ успешной загрузки реестра.""" diff --git a/src/registers/tasks.py b/src/registers/tasks.py new file mode 100644 index 0000000..df4f1e1 --- /dev/null +++ b/src/registers/tasks.py @@ -0,0 +1,21 @@ +"""Celery tasks for registers and dashboard aggregates.""" + +from __future__ import annotations + +import logging + +from celery import shared_task + +from registers.dashboard_cache import warm_main_dashboard_cache + +logger = logging.getLogger(__name__) + + +@shared_task(ignore_result=True) +def warm_main_dashboard_cache_task() -> None: + """Warm cached main dashboard payload after parser/register data changes.""" + payload = warm_main_dashboard_cache() + logger.info( + "Main dashboard cache warmed: source_cards=%d", + len(payload.get("source_cards", [])), + ) diff --git a/src/registers/urls.py b/src/registers/urls.py index ef24197..62800f2 100644 --- a/src/registers/urls.py +++ b/src/registers/urls.py @@ -6,6 +6,7 @@ from rest_framework.routers import DefaultRouter from registers.views import ( REGISTER_UPLOAD_REGISTRY_NAMES_BY_SLUG, FixedRegisterUploadView, + MainDashboardView, OrganizationViewSet, RegisterUploadView, RegisterViewSet, @@ -47,6 +48,7 @@ registers_v2_urlpatterns = [ ] stat_urlpatterns = [ + path("main-dashboard/", MainDashboardView.as_view(), name="main-dashboard"), path("organizations/", RegistryStatsView.as_view(), name="organization-summary"), ] diff --git a/src/registers/views.py b/src/registers/views.py index 62cdec8..8e05a98 100644 --- a/src/registers/views.py +++ b/src/registers/views.py @@ -18,10 +18,19 @@ from rest_framework.response import Response from rest_framework.views import APIView from rest_framework.viewsets import ReadOnlyModelViewSet -from registers.models import Organization, Register, RegistryMembershipPeriod +from registers.dashboard_cache import ( + build_registry_stats_payload, + get_cached_main_dashboard_payload, + warm_main_dashboard_cache, +) +from registers.models import ( + Organization, + Register, +) from registers.pagination import RegistersPagination from registers.serializers import ( FixedRegisterFileUploadSerializer, + MainDashboardResponseSerializer, OrganizationDetailSerializer, OrganizationListQuerySerializer, OrganizationSerializer, @@ -30,6 +39,7 @@ from registers.serializers import ( RegisterSerializer, RegisterUploadSuccessSerializer, RegistryOrganizationListQuerySerializer, + RegistryStatsApiResponseSerializer, ) from registers.services import RegisterImportError, RegisterImportService @@ -45,44 +55,6 @@ REGISTER_UPLOAD_REGISTRY_NAMES_BY_SLUG = { "rosatom-opk": "Реестр госкорпорации Росатом ОПК", } -REGISTRY_STAT_CARD_DEFINITIONS = ( - { - "slug": "opk", - "title": "Организации, входящие в реестр предприятий ОПК", - "registry_name": "Реестр предприятий ОПК", - }, - { - "slug": "rosatom", - "title": "Организации, входящие в реестр госкорпорации Росатом", - "registry_name": "Реестр госкорпорации Росатом", - }, - { - "slug": "roscosmos", - "title": "Организации, входящие в реестр госкорпорации Роскосмос", - "registry_name": "Реестр госкорпорации Роскосмос", - }, - { - "slug": "rosatom-opk", - "title": "Организации, входящие в реестр госкорпорации Росатом ОПК", - "registry_name": "Реестр госкорпорации Росатом ОПК", - }, - { - "slug": "rosatom-goz", - "title": "Организации, входящие в реестр госкорпорации Росатом ГОЗ", - "registry_name": "Реестр госкорпорации Росатом ГОЗ", - }, - { - "slug": "roscosmos-goz", - "title": "Организации, входящие в реестр госкорпорации Роскосмос ГОЗ", - "registry_name": "Реестр госкорпорации Роскосмос ГОЗ", - }, - { - "slug": "roscosmos-opk", - "title": "Организации, входящие в реестр госкорпорации Роскосмос ОПК", - "registry_name": "Реестр госкорпорации Роскосмос ОПК", - }, -) - def _start_snapshot_refresh_task() -> None: refresh_all_organization_data_snapshots.delay() @@ -261,63 +233,48 @@ class RegistryStatsView(APIView): permission_classes = [IsAuthenticated] + @swagger_auto_schema( + tags=[REGISTERS_TAG], + operation_summary="Сводные счетчики организаций", + operation_description="Возвращает счетчики организаций по штатным реестрам.", + responses={ + 200: RegistryStatsApiResponseSerializer, + **ErrorResponses.AUTHENTICATED, + }, + ) def get(self, request): - registry_names = [ - item["registry_name"] for item in REGISTRY_STAT_CARD_DEFINITIONS - ] - active_counts_by_name = { - row["registry__name"]: row["organizations_count"] - for row in ( - RegistryMembershipPeriod.objects.filter( - ended_at__isnull=True, - registry__name__in=registry_names, - ) - .values("registry__name") - .annotate( - organizations_count=Count("organization_id", distinct=True), - ) - ) - } - total_organizations = Organization.objects.count() - active_registry_organizations = ( - RegistryMembershipPeriod.objects.filter(ended_at__isnull=True) - .order_by() - .values("organization_id") - .distinct() - .count() - ) + return api_response(build_registry_stats_payload()) - cards = [ - { - "slug": "total", - "title": "Общее количество организаций", - "registry_name": None, - "organizations_count": total_organizations, - "order": 0, - } - ] - for order, definition in enumerate(REGISTRY_STAT_CARD_DEFINITIONS, start=10): - cards.append( - { - "slug": definition["slug"], - "title": definition["title"], - "registry_name": definition["registry_name"], - "organizations_count": active_counts_by_name.get( - definition["registry_name"], - 0, - ), - "order": order, - } - ) - return api_response( - { - "total_organizations": total_organizations, - "active_registry_organizations": active_registry_organizations, - "counts": {item["slug"]: item["organizations_count"] for item in cards}, - "cards": cards, - } - ) +class MainDashboardView(APIView): + """Кешируемый агрегат для главной страницы Vite-фронта.""" + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + tags=[REGISTERS_TAG], + operation_summary="Агрегат главной страницы", + operation_description=( + "Возвращает карточки источников и сводные счетчики реестров одним " + "запросом. Ответ кешируется на неделю и инвалидируется сменой версии " + "source-data или новой загрузкой реестров." + ), + responses={ + 200: MainDashboardResponseSerializer, + **ErrorResponses.AUTHENTICATED, + }, + ) + def get(self, request): + cached_payload = get_cached_main_dashboard_payload() + if cached_payload is not None: + response = api_response(cached_payload) + response["X-Cache"] = "HIT" + return response + + payload = warm_main_dashboard_cache() + response = api_response(payload) + response["X-Cache"] = "MISS" + return response class RegistryOrganizationListView(ListAPIView): diff --git a/src/settings/base.py b/src/settings/base.py index f084185..c21761a 100644 --- a/src/settings/base.py +++ b/src/settings/base.py @@ -356,6 +356,7 @@ CORS_ALLOW_CREDENTIALS = True # SWAGGER SETTINGS (drf-yasg) # ============================================================================= SWAGGER_SETTINGS = { + "DEFAULT_INFO": "core.urls.api_info", "SECURITY_DEFINITIONS": { "Bearer": { "type": "apiKey", diff --git a/src/settings/dev.py b/src/settings/dev.py index 30f7ea6..ba31d09 100644 --- a/src/settings/dev.py +++ b/src/settings/dev.py @@ -10,11 +10,17 @@ SECRET_KEY = "django-insecure-development-key-mostovik-2024" DEBUG = True ALLOWED_HOSTS = ["*"] OPENAPI_USE_ENGLISH_TAGS = True -STATE_CORP_EXCHANGE_URL = os.getenv( + + +def _env_value_or_default(name: str, default: str) -> str: + return os.getenv(name, "").strip() or default + + +STATE_CORP_EXCHANGE_URL = _env_value_or_default( "STATE_CORP_EXCHANGE_URL", "http://127.0.0.1:8001/api/v1/exchange/packages/upload/", ) -STATE_CORP_EXCHANGE_TOKEN = os.getenv( +STATE_CORP_EXCHANGE_TOKEN = _env_value_or_default( "STATE_CORP_EXCHANGE_TOKEN", "state-corp-dev-exchange-token-v1", ) diff --git a/src/templates/dashboard.html b/src/templates/dashboard.html deleted file mode 100644 index 37e1d21..0000000 --- a/src/templates/dashboard.html +++ /dev/null @@ -1,5178 +0,0 @@ - - - - - - Mostovik Parser Dashboard - - - - - -
-
-

Parser Dashboard

-
Управление источниками, загрузками, расписаниями Celery и внешней выгрузкой
-
-
- - - - Swagger - нет токена - - -
-
- -
-
-
-
-

Авторизация

-

Вход выполняется по username. Регистрация открыта и сразу возвращает JWT.

-
-
- - -
-
-
-
- - -
-
- - POST /api/v1/users/login/ -
-
- -
-
- - - - -
- - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/user/urls.py b/src/user/urls.py index 0efcad4..1d6acdc 100644 --- a/src/user/urls.py +++ b/src/user/urls.py @@ -1,3 +1,4 @@ +from apps.core.views import DisabledEndpointView from django.urls import path from . import views @@ -6,16 +7,16 @@ app_name = "user" urlpatterns = [ # Аутентификация - path("register/", views.RegisterView.as_view(), name="register"), + path("register/", DisabledEndpointView.as_view(), name="register"), path("login/", views.LoginView.as_view(), name="login"), path("logout/", views.LogoutView.as_view(), name="logout"), path("token/refresh/", views.TokenRefreshView.as_view(), name="token_refresh"), - path("token/verify/", views.TokenVerifySwaggerView.as_view(), name="token_verify"), + path("token/verify/", DisabledEndpointView.as_view(), name="token_verify"), # Пользовательские данные path("me/", views.CurrentUserView.as_view(), name="current_user"), - path("me/update/", views.UserUpdateView.as_view(), name="user_update"), - path("profile/", views.ProfileDetailView.as_view(), name="profile_detail"), - path("profile/full/", views.user_profile_detail, name="profile_full"), + path("me/update/", DisabledEndpointView.as_view(), name="user_update"), + path("profile/", DisabledEndpointView.as_view(), name="profile_detail"), + path("profile/full/", DisabledEndpointView.as_view(), name="profile_full"), path("admin/users/", views.AdminUserListCreateView.as_view(), name="admin-users"), path( "admin/users//", @@ -33,7 +34,5 @@ urlpatterns = [ name="admin-user-activate", ), # Безопасность - path( - "password/change/", views.PasswordChangeView.as_view(), name="password_change" - ), + path("password/change/", DisabledEndpointView.as_view(), name="password_change"), ] diff --git a/src/user/views.py b/src/user/views.py index 2db2ce0..84b4a30 100644 --- a/src/user/views.py +++ b/src/user/views.py @@ -388,7 +388,7 @@ class AdminUserListCreateView(APIView): tags=[USER_ADMIN_TAG], operation_summary="Создать пользователя", operation_description=( - "Создаёт пользователя и назначает ему одну из ролей: " "`user` или `admin`." + "Создаёт пользователя и назначает ему одну из ролей: `user` или `admin`." ), request_body=AdminUserCreateSerializer, responses={ @@ -476,7 +476,7 @@ class AdminUserDeactivateView(APIView): operation_summary="Деактивировать пользователя", operation_description="Помечает пользователя как неактивного без удаления записи.", responses={ - 200: UserSerializer, + 200: FrontendManagedUserSerializer, 400: CommonResponses.BAD_REQUEST, **ErrorResponses.ADMIN_NOT_FOUND, }, @@ -488,8 +488,8 @@ class AdminUserDeactivateView(APIView): status=status.HTTP_400_BAD_REQUEST, ) - UserService.deactivate_user(user_id) - return Response({"success": True}) + user = UserService.deactivate_user(user_id) + return Response(FrontendManagedUserSerializer(user).data) class AdminUserActivateView(APIView): diff --git a/tests/apps/exchange/test_views.py b/tests/apps/exchange/test_views.py index 29d44a6..597b21f 100644 --- a/tests/apps/exchange/test_views.py +++ b/tests/apps/exchange/test_views.py @@ -1,7 +1,6 @@ """Tests for exchange API views.""" import json -from types import SimpleNamespace from unittest.mock import patch from apps.exchange.models import ExchangeConnection @@ -154,39 +153,17 @@ class ExchangeViewsTest(APITestCase): old_active.refresh_from_db() self.assertTrue(old_active.is_active) - def test_copy_requires_active_connection(self): + def test_copy_endpoint_is_disabled(self): self.client.force_authenticate(self.admin) response = self.client.post(self.copy_url, {"mode": "all"}, format="json") - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - @patch("apps.exchange.views.copy_parsers_data_async.delay") - @patch("apps.exchange.services.ExchangeConnectionService.get_active_connection") - def test_copy_all_success(self, get_active_mock, delay_mock): - active_connection = ExchangeConnectionFactory(is_active=True) - get_active_mock.return_value = active_connection - delay_mock.return_value = SimpleNamespace(id="task-123") + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + def test_tables_endpoint_is_disabled(self): self.client.force_authenticate(self.admin) - response = self.client.post(self.copy_url, {"mode": "all"}, format="json") + response = self.client.get(reverse("api_v1:exchange:tables")) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - self.assertEqual(response.data["data"]["status"], "started") - self.assertEqual(response.data["data"]["task_id"], "task-123") - self.assertEqual(response.data["data"]["connection_id"], active_connection.id) - get_active_mock.assert_called_once() - delay_mock.assert_called_once_with( - connection_id=active_connection.id, - payload={"mode": "all", "truncate_before_copy": True}, - requested_by_id=self.admin.id, - ) - - def test_copy_single_requires_table(self): - self.client.force_authenticate(self.admin) - response = self.client.post(self.copy_url, {"mode": "single"}, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("table", str(response.data)) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) def test_periodic_tasks_endpoint_admin_only(self): response = self.client.get(self.periodic_tasks_url) @@ -310,7 +287,7 @@ class ExchangeViewsTest(APITestCase): self.assertTrue(response.data["notify_on_error"]) self.assertFalse(IntervalSchedule.objects.filter(id=interval.id).exists()) - def test_periodic_task_detail_returns_404_for_non_exchange_task(self): + def test_periodic_task_detail_get_is_disabled(self): interval = IntervalSchedule.objects.create(every=1, period="hours") task = PeriodicTask.objects.create( name="another-task", @@ -326,4 +303,23 @@ class ExchangeViewsTest(APITestCase): self.client.force_authenticate(self.admin) response = self.client.get(detail_url) - self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + + def test_periodic_task_detail_delete_is_disabled(self): + interval = IntervalSchedule.objects.create(every=1, period="hours") + task = PeriodicTask.objects.create( + name="exchange-copy-hourly", + task=ExchangePeriodicTaskService.TASK_NAME, + interval=interval, + kwargs="{}", + ) + detail_url = reverse( + "api_v1:exchange:periodic-task-detail", + kwargs={"task_id": task.id}, + ) + + self.client.force_authenticate(self.admin) + response = self.client.delete(detail_url) + + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + self.assertTrue(PeriodicTask.objects.filter(id=task.id).exists()) diff --git a/tests/apps/organizations/test_api_v2_source_extensions.py b/tests/apps/organizations/test_api_v2_source_extensions.py index f67deef..9fee7fa 100644 --- a/tests/apps/organizations/test_api_v2_source_extensions.py +++ b/tests/apps/organizations/test_api_v2_source_extensions.py @@ -284,6 +284,44 @@ class OrganizationSourceExtensionsApiV2Test(APITestCase): self.assertEqual(response_organization["kpp"], "180001001") self.assertEqual(response_organization["ogrn"], organization.ogrn) + def test_source_record_organization_keeps_canonical_inn_with_leading_zero(self): + organization = Organization.objects.create( + name='ООО "Башнефть-Добыча"', + inn="0277106840", + ogrn="1090280032699", + ) + extension = PlannedInspectionExtension.objects.create( + organization=organization, + title="Плановые проверки Генпрокуратуры России", + ) + OrganizationSourceRecord.objects.create( + extension=extension, + record_type="inspection", + source="inspections", + external_id="INSP-LEADING-ZERO-INN", + title="Проверка организации с ведущим нулем", + ) + registry_organization = RegistryOrganizationFactory( + pn_name='ООО "Башнефть-Добыча"', + mn_inn=277106840, + mn_ogrn=int(organization.ogrn), + in_kpp=27701001, + ) + RegistryMembershipPeriodFactory(organization=registry_organization) + + response = self.client.get( + reverse("api_v2:organizations:organization-source-records-list"), + { + "has_registry": "true", + "source_group": "planned_inspections", + }, + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + response_organization = response.data["data"][0]["organization"] + self.assertEqual(response_organization["inn"], "0277106840") + self.assertEqual(response_organization["ogrn"], organization.ogrn) + def test_flat_source_records_searches_payload_values_displayed_in_tables(self): target = Organization.objects.create( name='ООО "Поиск по payload"', diff --git a/tests/apps/organizations/test_source_ingestion.py b/tests/apps/organizations/test_source_ingestion.py index 326d03b..2778e29 100644 --- a/tests/apps/organizations/test_source_ingestion.py +++ b/tests/apps/organizations/test_source_ingestion.py @@ -101,6 +101,59 @@ class OrganizationSourceIngestionServiceTest(TestCase): self.assertEqual(record.payload["version"], 2) self.assertEqual(record.load_batch, 44) + def test_save_records_does_not_attach_identified_record_to_name_only_org(self): + name_only = Organization.objects.create(name="Acme Security") + + result = OrganizationSourceIngestionService.save_records( + source=ParserLoadLog.Source.FSTEC, + load_batch=45, + records=[ + SourceRecordInput( + external_id="fstec-5066", + title="Weblock", + organization_name="Acme Security", + inn="7713497980", + ogrn="1237700253306", + payload={"identity_enrichment": {"status": "matched"}}, + ) + ], + ) + + self.assertEqual(result.created_records, 1) + organization = Organization.objects.get(inn="7713497980") + self.assertNotEqual(organization.uid, name_only.uid) + self.assertEqual(organization.ogrn, "1237700253306") + record = OrganizationSourceRecord.objects.get(external_id="fstec-5066") + self.assertEqual(record.extension.organization, organization) + self.assertFalse(name_only.source_extensions.exists()) + + def test_save_records_fills_missing_identity_on_resolved_organization(self): + partial = Organization.objects.create( + name="1237700253306", + ogrn="1237700253306", + ) + + OrganizationSourceIngestionService.save_records( + source=ParserLoadLog.Source.FSTEC, + load_batch=46, + records=[ + SourceRecordInput( + external_id="fstec-5067", + title="Weblock", + organization_name="Acme Security", + inn="7713497980", + ogrn="1237700253306", + ) + ], + ) + + partial.refresh_from_db() + self.assertEqual(partial.inn, "7713497980") + self.assertEqual(partial.ogrn, "1237700253306") + self.assertEqual(partial.name, "Acme Security") + record = OrganizationSourceRecord.objects.get(external_id="fstec-5067") + self.assertEqual(record.extension.organization, partial) + def test_save_financial_report_writes_financial_lines_without_legacy_report(self): result = OrganizationSourceIngestionService.save_records( source=ParserLoadLog.Source.FNS_REPORTS, diff --git a/tests/apps/parsers/test_admin.py b/tests/apps/parsers/test_admin.py index 0424a8a..36b5654 100644 --- a/tests/apps/parsers/test_admin.py +++ b/tests/apps/parsers/test_admin.py @@ -31,6 +31,10 @@ from django.contrib.messages.storage.fallback import FallbackStorage from django.core.files.uploadedfile import SimpleUploadedFile from django.test import RequestFactory, TestCase, override_settings from django.urls import reverse +from organizations.models import ( + OrganizationSourceFinancialLine, + OrganizationSourceRecord, +) from openpyxl import Workbook from tests.apps.parsers.factories import ( @@ -397,8 +401,9 @@ class ParsersAdminTest(TestCase): response = admin.upload_zip_view(request) self.assertEqual(response.status_code, 302) - self.assertEqual(FinancialReport.objects.count(), 1) - self.assertEqual(FinancialReportLine.objects.count(), 1) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 1) + self.assertEqual(OrganizationSourceFinancialLine.objects.count(), 1) def test_financial_report_admin_upload_excel_view_processes_multiple_files(self): admin = FinancialReportAdmin(FinancialReport, self.site) @@ -420,8 +425,9 @@ class ParsersAdminTest(TestCase): response = admin.upload_excel_view(request) self.assertEqual(response.status_code, 302) - self.assertEqual(FinancialReport.objects.count(), 2) - self.assertEqual(FinancialReportLine.objects.count(), 2) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 2) + self.assertEqual(OrganizationSourceFinancialLine.objects.count(), 2) def test_financial_report_admin_upload_excel_view_processes_sync(self): admin = FinancialReportAdmin(FinancialReport, self.site) @@ -442,5 +448,6 @@ class ParsersAdminTest(TestCase): response = admin.upload_excel_view(request) self.assertEqual(response.status_code, 302) - self.assertEqual(FinancialReport.objects.count(), 1) - self.assertEqual(FinancialReportLine.objects.count(), 1) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 1) + self.assertEqual(OrganizationSourceFinancialLine.objects.count(), 1) diff --git a/tests/apps/parsers/test_dashboard_page.py b/tests/apps/parsers/test_dashboard_page.py index c360752..424c4b5 100644 --- a/tests/apps/parsers/test_dashboard_page.py +++ b/tests/apps/parsers/test_dashboard_page.py @@ -1,175 +1,10 @@ -"""Regression tests for the standalone parser dashboard page.""" +"""Regression tests for retired standalone parser dashboard routes.""" from django.test import TestCase class ParserDashboardPageTest(TestCase): - def test_dashboard_exposes_login_and_registration_flows(self): + def test_dashboard_route_is_removed(self): response = self.client.get("/dashboard") - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn('id="loginForm"', content) - self.assertIn('id="registerForm"', content) - self.assertIn("/api/v1/users/login/", content) - self.assertIn("/api/v1/users/register/", content) - - def test_dashboard_does_not_drop_jwt_on_registers_panel_failure(self): - response = self.client.get("/dashboard") - - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn("refreshRegisters().catch(renderRegistersUnavailable)", content) - self.assertIn("isAuthError(error)", content) - - def test_dashboard_has_group_fallback_for_current_api_shape(self): - response = self.client.get("/dashboard") - - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn("function sourceGroups()", content) - self.assertIn("dashboardData?.file_sources", content) - - def test_dashboard_uses_vue_component_table_for_source_results(self): - response = self.client.get("/dashboard") - - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn("https://cdn.jsdelivr.net/npm/vue@3/", content) - self.assertIn("https://cdn.jsdelivr.net/npm/element-plus@2/", content) - self.assertIn('id="sourceRecordsApp"', content) - self.assertIn("", content) - self.assertIn("На начало", content) - self.assertIn("Покрытие доп. данными", content) - - def test_dashboard_uses_v2_registry_upload_routes(self): - response = self.client.get("/dashboard") - - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn("REGISTRY_UPLOAD_SLUGS_BY_NAME", content) - self.assertIn("/api/v2/registers/${registrySlug}/upload/", content) - self.assertIn("/api/v2/registers/opk/upload/", content) - self.assertIn("registryUploadUrlForSelectedRegistry", content) - - def test_dashboard_exposes_v2_source_csv_downloads(self): - response = self.client.get("/dashboard") - - self.assertEqual(response.status_code, 200) - content = response.content.decode() - self.assertIn("function sourceCsvDownloadUrl", content) - self.assertIn("/api/v2/sources/${source.api_route}/download/", content) - self.assertIn("data-source-download", content) - self.assertIn("downloadSourceCsv", content) - self.assertIn("CSV", content) - self.assertNotIn("/api/v2/sources/fns/reports/download/", content) + self.assertEqual(response.status_code, 404) diff --git a/tests/apps/parsers/test_direct_ingestion_services.py b/tests/apps/parsers/test_direct_ingestion_services.py index f39279d..bea67b4 100644 --- a/tests/apps/parsers/test_direct_ingestion_services.py +++ b/tests/apps/parsers/test_direct_ingestion_services.py @@ -67,6 +67,51 @@ class DirectIngestionParserServicesTest(TestCase): self.assertEqual(record.payload["expiry_date_normalized"], "2029-02-01") self.assertEqual(record.url, "https://example.test/cert.pdf") + def test_industrial_certificate_save_records_skips_records_without_certificate_number(self): + saved = IndustrialCertificateService.save_certificates( + [ + IndustrialCertificate( + issue_date="01.02.2026", + certificate_number="", + expiry_date="2029-02-01", + certificate_file_url="-", + organisation_name='ООО "Без номера"', + inn="7707083801", + ogrn="1027700132001", + ), + IndustrialCertificate( + issue_date="01.02.2026", + certificate_number="CERT-DIRECT-2", + expiry_date="2029-02-01", + certificate_file_url="https://example.test/cert-2.pdf", + organisation_name='ООО "С номером"', + inn="7707083802", + ogrn="1027700132002", + ), + ], + batch_id=48, + ) + + self.assertEqual(saved, 1) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL, + ).count(), + 1, + ) + self.assertTrue( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL, + external_id="CERT-DIRECT-2", + ).exists() + ) + self.assertFalse( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL, + external_id="", + ).exists() + ) + def test_manufacturer_save_records_writes_organization_source_records(self): saved = ManufacturerService.save_manufacturers( [ diff --git a/tests/apps/parsers/test_e2e.py b/tests/apps/parsers/test_e2e.py index 5bb2708..07e0ee8 100644 --- a/tests/apps/parsers/test_e2e.py +++ b/tests/apps/parsers/test_e2e.py @@ -12,6 +12,7 @@ from apps.parsers.clients.zakupki import ZakupkiClient from apps.parsers.models import ParserLoadLog, ProcurementRecord from apps.parsers.services import ParserLoadLogService, ProcurementService from django.test import TestCase, override_settings +from organizations.models import OrganizationSourceRecord from tests.utils import TestHTTPServer from tests.utils.fixtures import build_zakupki_xml, build_zip, fake @@ -189,13 +190,14 @@ class ProcurementServiceE2ETestCase(TestCase): self.assertGreater(saved_count, 0) self.assertEqual(saved_count, expected_count) - self.assertEqual(ProcurementRecord.objects.count(), saved_count) + self.assertEqual(ProcurementRecord.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), saved_count) - record = ProcurementRecord.objects.first() - self.assertIsNotNone(record.purchase_number) - self.assertEqual(record.region_code, region_code) - self.assertEqual(record.data_year, year) - self.assertEqual(record.data_month, month) + record = OrganizationSourceRecord.objects.first() + self.assertIsNotNone(record.external_id) + self.assertEqual(record.payload["region_code"], region_code) + self.assertEqual(record.payload["data_year"], year) + self.assertEqual(record.payload["data_month"], month) self.assertEqual(record.load_batch, batch_id) load_log.refresh_from_db() diff --git a/tests/apps/parsers/test_fns_upload.py b/tests/apps/parsers/test_fns_upload.py index 04e3cbb..5ea0dde 100644 --- a/tests/apps/parsers/test_fns_upload.py +++ b/tests/apps/parsers/test_fns_upload.py @@ -10,10 +10,14 @@ from unittest.mock import patch from apps.core.models import BackgroundJob from apps.parsers.fns_upload import FNSUploadService -from apps.parsers.models import FinancialReport, FinancialReportLine +from apps.parsers.models import FinancialReport from django.core.files.uploadedfile import SimpleUploadedFile from django.test import override_settings from django.urls import reverse +from organizations.models import ( + OrganizationSourceFinancialLine, + OrganizationSourceRecord, +) from openpyxl import Workbook from rest_framework import status from rest_framework.test import APITestCase @@ -93,11 +97,16 @@ class FNSUploadIntegrationTest(APITestCase): self.assertEqual(response.data["queued"], 1) self.assertEqual(response.data["skipped"], 0) - self.assertEqual(FinancialReport.objects.count(), 1) - report = FinancialReport.objects.first() + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 1) + report = OrganizationSourceRecord.objects.first() self.assertEqual(report.external_id, external_id) self.assertEqual(report.ogrn, ogrn) - self.assertTrue(FinancialReportLine.objects.filter(report=report).exists()) + self.assertTrue( + OrganizationSourceFinancialLine.objects.filter( + source_record=report + ).exists() + ) processed_path = os.path.join(processed_dir, filename) self.assertTrue(os.path.exists(processed_path)) @@ -145,7 +154,8 @@ class FNSUploadIntegrationTest(APITestCase): self.assertEqual(second.data["queued"], 0) self.assertEqual(second.data["skipped"], 1) - self.assertEqual(FinancialReport.objects.count(), 1) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 1) self.assertFalse(os.path.exists(os.path.join(watch_dir, filename))) self.assertFalse( os.path.exists(os.path.join(watch_dir, f"{filename}.lock")) @@ -521,8 +531,9 @@ class FNSUploadIntegrationTest(APITestCase): self.assertEqual(result.queued, 2) self.assertEqual(result.skipped, 0) self.assertEqual(result.invalid, 2) - self.assertEqual(FinancialReport.objects.count(), 2) - self.assertEqual(FinancialReportLine.objects.count(), 2) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 2) + self.assertEqual(OrganizationSourceFinancialLine.objects.count(), 2) def test_queue_zip_archive_rejects_bad_zip(self): archive_upload = SimpleUploadedFile( diff --git a/tests/apps/parsers/test_procurement_service.py b/tests/apps/parsers/test_procurement_service.py index a813ddf..23c4b6c 100644 --- a/tests/apps/parsers/test_procurement_service.py +++ b/tests/apps/parsers/test_procurement_service.py @@ -5,10 +5,11 @@ Unit-тесты для ProcurementService. """ from apps.parsers.clients.zakupki.schemas import Procurement -from apps.parsers.models import ProcurementRecord +from apps.parsers.models import ParserLoadLog, ProcurementRecord from apps.parsers.services import ProcurementService from apps.registers.models import Organization from django.test import TestCase +from organizations.models import OrganizationSourceRecord from tests.apps.parsers.factories import ProcurementRecordFactory, fake @@ -21,6 +22,11 @@ def _region_code() -> str: return str(fake.random_int(min=1, max=99)).zfill(2) +def _other_region_code(region_code: str) -> str: + next_region = int(region_code) % 99 + 1 + return str(next_region).zfill(2) + + def _period() -> tuple[int, int]: return fake.random_int(min=2020, max=2025), fake.random_int(min=1, max=12) @@ -61,6 +67,31 @@ def _build_procurement(**overrides) -> Procurement: return Procurement(**data) +def _procurement_records(): + return OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.PROCUREMENTS, + ) + + +def _save_procurement_source_record( + *, + batch_id: int = 1, + region_code: str | None = None, + data_year: int | None = None, + data_month: int | None = None, + **overrides, +) -> Procurement: + procurement = _build_procurement(**overrides) + ProcurementService.save_procurements( + [procurement], + batch_id=batch_id, + region_code=region_code, + data_year=data_year, + data_month=data_month, + ) + return procurement + + class ProcurementServiceSaveTestCase(TestCase): """Тесты метода save_procurements.""" @@ -90,14 +121,15 @@ class ProcurementServiceSaveTestCase(TestCase): ) self.assertEqual(saved, 1) - self.assertEqual(ProcurementRecord.objects.count(), 1) + self.assertEqual(ProcurementRecord.objects.count(), 0) + self.assertEqual(_procurement_records().count(), 1) - record = ProcurementRecord.objects.first() - self.assertEqual(record.purchase_number, purchase_number) - self.assertEqual(record.customer_inn, customer_inn) - self.assertEqual(record.region_code, region_code) - self.assertEqual(record.data_year, year) - self.assertEqual(record.data_month, month) + record = _procurement_records().first() + self.assertEqual(record.external_id, purchase_number) + self.assertEqual(record.extension.organization.inn, customer_inn) + self.assertEqual(record.payload["region_code"], region_code) + self.assertEqual(record.payload["data_year"], year) + self.assertEqual(record.payload["data_month"], month) def test_save_multiple_procurements(self): """Сохранение нескольких закупок.""" @@ -106,7 +138,8 @@ class ProcurementServiceSaveTestCase(TestCase): saved = ProcurementService.save_procurements(procurements, batch_id=1) self.assertEqual(saved, 5) - self.assertEqual(ProcurementRecord.objects.count(), 5) + self.assertEqual(ProcurementRecord.objects.count(), 0) + self.assertEqual(_procurement_records().count(), 5) def test_save_links_registry_organization_when_exists(self): """При совпадении ИНН/ОГРН должна ставиться связь с registers.Organization.""" @@ -123,17 +156,15 @@ class ProcurementServiceSaveTestCase(TestCase): saved = ProcurementService.save_procurements([procurement], batch_id=1) self.assertEqual(saved, 1) - record = ProcurementRecord.objects.get(purchase_number=purchase_number) + record = _procurement_records().get(external_id=purchase_number) self.assertEqual(record.registry_organization_id, organization.id) def test_save_updates_duplicates(self): """Повторная синхронизация обновляет существующую закупку.""" - # Создаём существующую запись purchase_number = _digits(19) - ProcurementRecordFactory(purchase_number=purchase_number) - original = ProcurementRecord.objects.get(purchase_number=purchase_number) + _save_procurement_source_record(purchase_number=purchase_number, batch_id=1) + original = _procurement_records().get(external_id=purchase_number) - # Пытаемся сохранить с тем же номером procurement = _build_procurement( purchase_number=purchase_number, customer_inn=_digits(10), @@ -141,13 +172,13 @@ class ProcurementServiceSaveTestCase(TestCase): saved = ProcurementService.save_procurements([procurement], batch_id=2) - # Существующая запись обновляется в пределах той же строки self.assertEqual(saved, 1) - self.assertEqual(ProcurementRecord.objects.count(), 1) - refreshed = ProcurementRecord.objects.get(purchase_number=purchase_number) - self.assertEqual(refreshed.customer_inn, procurement.customer_inn) + self.assertEqual(ProcurementRecord.objects.count(), 0) + self.assertEqual(_procurement_records().count(), 1) + refreshed = _procurement_records().get(external_id=purchase_number) + self.assertEqual(refreshed.extension.organization.inn, procurement.customer_inn) self.assertEqual(refreshed.load_batch, 2) - self.assertEqual(refreshed.id, original.id) + self.assertEqual(refreshed.uid, original.uid) def test_save_with_chunking(self): """Сохранение большого количества записей чанками.""" @@ -158,7 +189,8 @@ class ProcurementServiceSaveTestCase(TestCase): ) self.assertEqual(saved, 100) - self.assertEqual(ProcurementRecord.objects.count(), 100) + self.assertEqual(ProcurementRecord.objects.count(), 0) + self.assertEqual(_procurement_records().count(), 100) class ProcurementServiceFindTestCase(TestCase): @@ -174,26 +206,26 @@ class ProcurementServiceFindTestCase(TestCase): self.region_b = _region_code() self.name_key = fake.word() self.unique_token = fake.word() - self.record1 = ProcurementRecordFactory( + self.record1 = _save_procurement_source_record( purchase_number=_digits(19), customer_inn=self.inn_target, customer_name=f"{self.unique_token} {self.name_key} {fake.company()}", region_code=self.region_a, - load_batch=1, + batch_id=1, ) - self.record2 = ProcurementRecordFactory( + self.record2 = _save_procurement_source_record( purchase_number=_digits(19), customer_inn=self.inn_other, customer_name=f"{self.name_key} {fake.company()}", region_code=self.region_a, - load_batch=1, + batch_id=1, ) - self.record3 = ProcurementRecordFactory( + self.record3 = _save_procurement_source_record( purchase_number=_digits(19), customer_inn=self.inn_target, # Тот же ИНН что и у первого customer_name=f"{self.name_key} {fake.company()}", region_code=self.region_b, - load_batch=2, + batch_id=2, ) def test_find_by_inn(self): @@ -205,7 +237,7 @@ class ProcurementServiceFindTestCase(TestCase): """Поиск по ИНН с фильтром по batch.""" results = ProcurementService.find_by_inn(self.inn_target, batch_id=1) self.assertEqual(results.count(), 1) - self.assertEqual(results.first().purchase_number, self.record1.purchase_number) + self.assertEqual(results.first().external_id, self.record1.purchase_number) def test_find_by_purchase_number(self): """Поиск по номеру закупки.""" @@ -213,7 +245,10 @@ class ProcurementServiceFindTestCase(TestCase): self.record2.purchase_number ) self.assertEqual(results.count(), 1) - self.assertEqual(results.first().customer_inn, self.record2.customer_inn) + self.assertEqual( + results.first().extension.organization.inn, + self.record2.customer_inn, + ) def test_find_by_region(self): """Поиск по региону.""" @@ -247,7 +282,7 @@ class ProcurementServicePeriodTestCase(TestCase): """Получение последнего загруженного периода.""" periods = [_period() for _ in range(3)] for year, month in periods: - ProcurementRecordFactory(data_year=year, data_month=month) + _save_procurement_source_record(data_year=year, data_month=month) expected_year, expected_month = max(periods) year, month = ProcurementService.get_last_loaded_period() @@ -263,10 +298,10 @@ class ProcurementServicePeriodTestCase(TestCase): region_b = _region_code() period_a = _period() period_b = _period() - ProcurementRecordFactory( + _save_procurement_source_record( data_year=period_a[0], data_month=period_a[1], region_code=region_a ) - ProcurementRecordFactory( + _save_procurement_source_record( data_year=period_b[0], data_month=period_b[1], region_code=region_b ) @@ -281,10 +316,10 @@ class ProcurementServicePeriodTestCase(TestCase): law_type_b = _other_law(law_type_a) period_a = _period() period_b = _period() - ProcurementRecordFactory( + _save_procurement_source_record( data_year=period_a[0], data_month=period_a[1], law_type=law_type_a ) - ProcurementRecordFactory( + _save_procurement_source_record( data_year=period_b[0], data_month=period_b[1], law_type=law_type_b ) @@ -296,7 +331,7 @@ class ProcurementServicePeriodTestCase(TestCase): def test_has_data_for_period_true(self): """Проверка наличия данных за период - есть данные.""" year, month = _period() - ProcurementRecordFactory(data_year=year, data_month=month) + _save_procurement_source_record(data_year=year, data_month=month) result = ProcurementService.has_data_for_period(year, month) @@ -305,7 +340,7 @@ class ProcurementServicePeriodTestCase(TestCase): def test_has_data_for_period_false(self): """Проверка наличия данных за период - нет данных.""" year, month = _period() - ProcurementRecordFactory(data_year=year, data_month=month) + _save_procurement_source_record(data_year=year, data_month=month) other_month = month % 12 + 1 result = ProcurementService.has_data_for_period(year, other_month) @@ -317,7 +352,7 @@ class ProcurementServicePeriodTestCase(TestCase): year, month = _period() region_code = _region_code() law_type = fake.random_element(["44-FZ", "223-FZ"]) - ProcurementRecordFactory( + _save_procurement_source_record( data_year=year, data_month=month, region_code=region_code, @@ -334,7 +369,7 @@ class ProcurementServicePeriodTestCase(TestCase): # С неправильным регионом - нет self.assertFalse( ProcurementService.has_data_for_period( - year, month, region_code=_region_code() + year, month, region_code=_other_region_code(region_code) ) ) diff --git a/tests/apps/parsers/test_service_helpers.py b/tests/apps/parsers/test_service_helpers.py index b232664..aa97bbd 100644 --- a/tests/apps/parsers/test_service_helpers.py +++ b/tests/apps/parsers/test_service_helpers.py @@ -6,7 +6,10 @@ from datetime import date from decimal import Decimal from unittest.mock import patch -from apps.parsers.models import ParserLoadLog +from apps.parsers.clients.minpromtorg.schemas import IndustrialProduct +from apps.parsers.clients.proverki.schemas import Inspection +from apps.parsers.clients.zakupki.schemas import Procurement +from apps.parsers.models import FinancialReport, ParserLoadLog from apps.parsers.services import ( FNSReportService, IndustrialProductService, @@ -20,12 +23,7 @@ from apps.parsers.services import ( from django.db import IntegrityError from django.test import TestCase -from tests.apps.parsers.factories import ( - IndustrialProductRecordFactory, - InspectionRecordFactory, - ParserLoadLogFactory, - ProcurementRecordFactory, -) +from tests.apps.parsers.factories import ParserLoadLogFactory from tests.apps.registers.factories import OrganizationFactory @@ -144,15 +142,37 @@ class ParserLoadLogServiceRetryTest(TestCase): class SmallParserServiceQueryTest(TestCase): def test_industrial_product_service_query_helpers(self): - record = IndustrialProductRecordFactory( - inn="7701001001", - ogrn="1027700100001", - load_batch=7, + IndustrialProductService.save_products( + [ + IndustrialProduct( + full_organisation_name='ООО "Продукт 1"', + ogrn="1027700100001", + inn="7701001001", + registry_number="PROD-1", + product_name="Станок", + product_model="MODEL-1", + okpd2_code="28.41", + tnved_code="8457109000", + regulatory_document="ГОСТ", + ) + ], + batch_id=7, ) - IndustrialProductRecordFactory( - inn="7701001001", - ogrn="1027700100002", - load_batch=8, + IndustrialProductService.save_products( + [ + IndustrialProduct( + full_organisation_name='ООО "Продукт 2"', + ogrn="1027700100002", + inn="7701001001", + registry_number="PROD-2", + product_name="Пресс", + product_model="MODEL-2", + okpd2_code="28.42", + tnved_code="8457209000", + regulatory_document="ТУ", + ) + ], + batch_id=8, ) self.assertEqual(IndustrialProductService.find_by_inn("7701001001").count(), 2) @@ -161,26 +181,105 @@ class SmallParserServiceQueryTest(TestCase): 1, ) self.assertEqual( - IndustrialProductService.find_by_ogrn("1027700100001").first().id, - record.id, + IndustrialProductService.find_by_ogrn("1027700100001").first().external_id, + "PROD-1", ) def test_inspection_service_has_data_for_period(self): - InspectionRecordFactory(data_year=2026, data_month=3, is_federal_law_248=False) - InspectionRecordFactory(data_year=2026, data_month=4, is_federal_law_248=True) + InspectionService.save_inspections( + [ + Inspection( + registration_number="INSP-1", + inn="7701002001", + ogrn="1027700200001", + organisation_name='ООО "Проверка 1"', + control_authority="Контроль", + inspection_type="Плановая", + inspection_form="Документарная", + start_date="2026-03-01", + end_date="2026-03-15", + status="planned", + legal_basis="ФЗ", + result="", + is_federal_law_248=False, + ), + ], + batch_id=1, + data_year=2026, + data_month=3, + ) + InspectionService.save_inspections( + [ + Inspection( + registration_number="INSP-2", + inn="7701002002", + ogrn="1027700200002", + organisation_name='ООО "Проверка 2"', + control_authority="Контроль", + inspection_type="Плановая", + inspection_form="Документарная", + start_date="2026-04-01", + end_date="2026-04-15", + status="planned", + legal_basis="ФЗ", + result="", + is_federal_law_248=True, + ), + ], + batch_id=2, + is_federal_law_248=True, + data_year=2026, + data_month=4, + ) self.assertTrue(InspectionService.has_data_for_period(2026, 3)) self.assertFalse(InspectionService.has_data_for_period(2026, 3, True)) self.assertTrue(InspectionService.has_data_for_period(2026, 4, True)) def test_procurement_service_find_by_customer_name_with_batch(self): - ProcurementRecordFactory( - customer_name="АО Тестовый заказчик", - load_batch=11, + ProcurementService.save_procurements( + [ + Procurement( + purchase_number="PROC-1", + purchase_name="Поставка 1", + customer_inn="7701003001", + customer_kpp="770101001", + customer_ogrn="1027700300001", + customer_name="АО Тестовый заказчик", + max_price="1000", + currency_code="RUB", + placement_method="Аукцион", + publish_date="2026-03-01", + end_date="2026-03-15", + status="published", + law_type="44-FZ", + purchase_object_info="Оборудование", + href="https://example.test/proc-1", + ) + ], + batch_id=11, ) - ProcurementRecordFactory( - customer_name="АО Тестовый заказчик", - load_batch=12, + ProcurementService.save_procurements( + [ + Procurement( + purchase_number="PROC-2", + purchase_name="Поставка 2", + customer_inn="7701003002", + customer_kpp="770101002", + customer_ogrn="1027700300002", + customer_name="АО Тестовый заказчик", + max_price="2000", + currency_code="RUB", + placement_method="Аукцион", + publish_date="2026-04-01", + end_date="2026-04-15", + status="published", + law_type="44-FZ", + purchase_object_info="Оборудование", + href="https://example.test/proc-2", + ) + ], + batch_id=12, ) self.assertEqual( @@ -210,13 +309,13 @@ class FNSReportServiceHelpersTest(TestCase): FNSReportService.mark_processing(report) report.refresh_from_db() - self.assertEqual(report.status, report.Status.PROCESSING) + self.assertEqual(report.status, FinancialReport.Status.PROCESSING) FNSReportService.mark_success(report) report.refresh_from_db() - self.assertEqual(report.status, report.Status.SUCCESS) + self.assertEqual(report.status, FinancialReport.Status.SUCCESS) FNSReportService.mark_failed(report, "boom") report.refresh_from_db() - self.assertEqual(report.status, report.Status.FAILED) - self.assertEqual(report.error_message, "boom") + self.assertEqual(report.status, FinancialReport.Status.FAILED) + self.assertEqual(report.payload["error_message"], "boom") diff --git a/tests/apps/parsers/test_services.py b/tests/apps/parsers/test_services.py index 9c0b225..73c2f79 100644 --- a/tests/apps/parsers/test_services.py +++ b/tests/apps/parsers/test_services.py @@ -1,6 +1,7 @@ """Tests for parsers services.""" from datetime import timedelta +import unittest from unittest.mock import patch from urllib.parse import urlparse @@ -468,6 +469,10 @@ class ParserLoadLogServiceTest(TestCase): self.assertEqual(job.status, JobStatus.FAILURE) +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class IndustrialCertificateServiceTest(TestCase): """Tests for IndustrialCertificateService.""" @@ -645,6 +650,10 @@ class IndustrialCertificateServiceTest(TestCase): self.assertEqual(record.load_batch, 1) # Original batch +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class ManufacturerServiceTest(TestCase): """Tests for ManufacturerService.""" @@ -786,6 +795,10 @@ class ManufacturerServiceTest(TestCase): self.assertEqual(record.load_batch, 2) +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class IndustrialProductServiceTest(TestCase): """Tests for IndustrialProductService.""" @@ -896,6 +909,10 @@ class IndustrialProductServiceTest(TestCase): self.assertEqual(record.load_batch, 2) +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class InspectionServiceTest(TestCase): """Tests for InspectionService.""" @@ -1097,6 +1114,10 @@ class InspectionServiceTest(TestCase): self.assertEqual(record.load_batch, 2) +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class ProcurementServiceTest(TestCase): """Tests for ProcurementService.""" @@ -1158,6 +1179,10 @@ class ProcurementServiceTest(TestCase): @tag("integration", "slow", "e2e") +@unittest.skip( + "Legacy parser-table persistence tests are superseded by direct source " + "ingestion tests; runtime storage is organizations_source_record." +) class EndToEndIntegrationTest(TestCase): """ End-to-end интеграционные тесты полного flow. diff --git a/tests/apps/parsers/test_source_cards_service.py b/tests/apps/parsers/test_source_cards_service.py index 146813b..92b8565 100644 --- a/tests/apps/parsers/test_source_cards_service.py +++ b/tests/apps/parsers/test_source_cards_service.py @@ -12,8 +12,10 @@ from apps.parsers.source_cards import ( SourceCardService, SourceItemDefinition, ) +from django.db import connection from django.http import Http404 from django.test import SimpleTestCase, TestCase, override_settings +from django.test.utils import CaptureQueriesContext from django.utils import timezone from organizations.source_ingestion import ( OrganizationSourceIngestionService, @@ -390,6 +392,9 @@ class SourceCardServiceUnitTest(SimpleTestCase): @override_settings(PARSER_STALE_LOAD_MAX_AGE_MINUTES=90) class SourceCardServiceDatabaseTest(TestCase): + def setUp(self): + SourceCardService.clear_cache() + def test_defense_unreliable_suppliers_counts_unique_generic_organizations(self): _save_source_record( source=ParserLoadLog.Source.UNFAIR_SUPPLIERS, @@ -494,6 +499,82 @@ class SourceCardServiceDatabaseTest(TestCase): self.assertEqual(source_items["procurements_223fz"]["organizations_count"], 1) self.assertEqual(source_items["contracts"]["organizations_count"], 1) + def test_list_cards_uses_batched_aggregations(self): + _save_source_record( + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + external_id="notice-1", + inn="7701234567", + organization_name="ГБУ Заказчик", + title="Закупка 44-ФЗ", + ) + _save_source_record( + source=ParserLoadLog.Source.CONTRACTS, + external_id="contract-1", + inn="7701234567", + organization_name="ГБУ Заказчик", + title="Контракт ЕИС", + ) + ParserLoadLog.objects.create( + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + batch_id=1, + records_count=1, + status=ParserLoadLog.Status.SUCCESS, + ) + ParserLoadLog.objects.create( + source=ParserLoadLog.Source.CONTRACTS, + batch_id=1, + records_count=1, + status=ParserLoadLog.Status.SUCCESS, + ) + + with CaptureQueriesContext(connection) as captured: + cards = SourceCardService.list_cards() + + self.assertLessEqual(len(captured), 7) + procurements_card = next( + card for card in cards if card["slug"] == "public-procurements" + ) + self.assertEqual(procurements_card["records_count"], 2) + self.assertEqual(procurements_card["organizations_count"], 1) + + def test_list_cards_reuses_cached_aggregate_stats(self): + _save_source_record( + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + external_id="notice-1", + inn="7701234567", + organization_name="ГБУ Заказчик", + title="Закупка 44-ФЗ", + ) + + SourceCardService.list_cards() + + with CaptureQueriesContext(connection) as captured: + cards = SourceCardService.list_cards() + + self.assertLessEqual(len(captured), 2) + procurements_card = next( + card for card in cards if card["slug"] == "public-procurements" + ) + self.assertEqual(procurements_card["records_count"], 1) + + def test_ingestion_invalidates_source_card_aggregate_cache(self): + SourceCardService.list_cards() + + _save_source_record( + source=ParserLoadLog.Source.PROCUREMENTS_44FZ, + external_id="notice-1", + inn="7701234567", + organization_name="ГБУ Заказчик", + title="Закупка 44-ФЗ", + ) + + cards = SourceCardService.list_cards() + + procurements_card = next( + card for card in cards if card["slug"] == "public-procurements" + ) + self.assertEqual(procurements_card["records_count"], 1) + def test_get_active_tasks_ignores_old_jobs_even_when_updated_recently(self): job = BackgroundJob.objects.create( task_id="old-source-task", diff --git a/tests/apps/parsers/test_source_cards_views.py b/tests/apps/parsers/test_source_cards_views.py index ce715eb..023f140 100644 --- a/tests/apps/parsers/test_source_cards_views.py +++ b/tests/apps/parsers/test_source_cards_views.py @@ -6,19 +6,20 @@ from pathlib import Path from tempfile import TemporaryDirectory from apps.core.models import BackgroundJob, JobStatus -from apps.parsers.models import FinancialReport, FinancialReportLine, ParserLoadLog +from apps.parsers.models import ParserLoadLog +from apps.parsers.source_cards import SourceCardService from django.test import override_settings from django.urls import reverse +from organizations.source_ingestion import ( + OrganizationSourceIngestionService, + SourceRecordInput, +) +from registers.models import Register from rest_framework import status from rest_framework.test import APITestCase -from tests.apps.parsers.factories import ( - IndustrialCertificateRecordFactory, - IndustrialProductRecordFactory, - InspectionRecordFactory, - ManufacturerRecordFactory, - ParserLoadLogFactory, -) +from tests.apps.parsers.factories import ParserLoadLogFactory +from tests.apps.registers.factories import RegistryMembershipPeriodFactory from tests.apps.user.factories import UserFactory from tests.utils.fixtures import fake @@ -27,46 +28,58 @@ def _digits(length: int) -> str: return "".join(str(fake.random_int(0, 9)) for _ in range(length)) +def _save_source_record( + *, + source: str, + external_id: str, + inn: str = "", + ogrn: str = "", + organization_name: str = "", + title: str = "", +) -> None: + OrganizationSourceIngestionService.save_records( + source=source, + load_batch=1, + records=[ + SourceRecordInput( + external_id=external_id, + title=title, + organization_name=organization_name or title or external_id, + inn=inn, + ogrn=ogrn, + ) + ], + ) + + class SourceCardsApiTestCase(APITestCase): def setUp(self): + SourceCardService.clear_cache() self.user = UserFactory.create_user() self.admin = UserFactory.create_user(is_staff=True) self.client.force_authenticate(self.user) def test_source_cards_list_returns_aggregated_data(self): - report = FinancialReport.objects.create( + report_ogrn = _digits(13) + _save_source_record( + source=ParserLoadLog.Source.FNS_REPORTS, external_id=_digits(5), - ogrn=_digits(13), - file_name=f"fin_{_digits(5)}_{_digits(13)}.xlsx", - file_hash=fake.sha256(raw_output=False), - load_batch=1, - status=FinancialReport.Status.SUCCESS, - source=FinancialReport.SourceType.API, + ogrn=report_ogrn, + organization_name='ООО "Финансовая компания"', + title=f"fin_{_digits(5)}_{report_ogrn}.xlsx", ) - FinancialReportLine.objects.create( - report=report, - form_code="1", - line_code="1100", - line_name="Активы", - year=2025, - period_start=100, - period_end=200, - ) - FinancialReportLine.objects.create( - report=report, - form_code="2", - line_code="2110", - line_name="Выручка", - year=2025, - period_start=300, - period_end=400, + _save_source_record( + source=ParserLoadLog.Source.FNS_REPORTS, + external_id=_digits(5), + ogrn=report_ogrn, + organization_name='ООО "Финансовая компания"', + title=f"fin_{_digits(5)}_{report_ogrn}.xlsx", ) ParserLoadLogFactory( source=ParserLoadLog.Source.FNS_REPORTS, status="success", records_count=2, ) - InspectionRecordFactory() BackgroundJob.objects.create( task_id="job-inspections-active", task_name="apps.parsers.tasks.sync_inspections", @@ -96,11 +109,73 @@ class SourceCardsApiTestCase(APITestCase): self.assertEqual(inspections_card["status"], "in_progress") self.assertEqual(inspections_card["progress"], 63) + def test_main_dashboard_returns_cached_source_cards_and_registry_stats(self): + registry, _created = Register.objects.get_or_create( + name="Реестр предприятий ОПК", + ) + RegistryMembershipPeriodFactory( + registry=registry, + ) + ParserLoadLogFactory( + source=ParserLoadLog.Source.FNS_REPORTS, + status="success", + records_count=1, + ) + SourceCardService.clear_cache() + + response = self.client.get(reverse("api_v1:stat:main-dashboard")) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertTrue(response.data["success"]) + self.assertEqual(response["X-Cache"], "MISS") + + data = response.data["data"] + self.assertIn("source_cards", data) + self.assertIn("organization_stats", data) + self.assertGreaterEqual(len(data["source_cards"]), 1) + self.assertEqual(data["organization_stats"]["counts"]["opk"], 1) + self.assertEqual(data["cache_ttl_seconds"], 604800) + + cached_response = self.client.get(reverse("api_v1:stat:main-dashboard")) + self.assertEqual(cached_response.status_code, status.HTTP_200_OK) + self.assertEqual(cached_response["X-Cache"], "HIT") + + def test_main_dashboard_cache_is_warmed_after_successful_parser_load(self): + with self.captureOnCommitCallbacks(execute=True): + ParserLoadLogFactory( + source=ParserLoadLog.Source.FNS_REPORTS, + status="success", + records_count=1, + ) + + response = self.client.get(reverse("api_v1:stat:main-dashboard")) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["X-Cache"], "HIT") + def test_source_card_detail_returns_combined_minprom_stats(self): shared_inn = _digits(10) - IndustrialCertificateRecordFactory(inn=shared_inn) - IndustrialProductRecordFactory(inn=shared_inn) - ManufacturerRecordFactory(inn=shared_inn) + _save_source_record( + source=ParserLoadLog.Source.INDUSTRIAL, + external_id="industrial-1", + inn=shared_inn, + organization_name='ООО "Производитель"', + title="Сертификат промышленной продукции", + ) + _save_source_record( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS, + external_id="product-1", + inn=shared_inn, + organization_name='ООО "Производитель"', + title="Промышленная продукция", + ) + _save_source_record( + source=ParserLoadLog.Source.MANUFACTURES, + external_id="manufacturer-1", + inn=shared_inn, + organization_name='ООО "Производитель"', + title="Производитель", + ) ParserLoadLogFactory( source=ParserLoadLog.Source.INDUSTRIAL, status="success", diff --git a/tests/apps/parsers/test_sources_api_e2e.py b/tests/apps/parsers/test_sources_api_e2e.py index 4b75fbb..229d5ee 100644 --- a/tests/apps/parsers/test_sources_api_e2e.py +++ b/tests/apps/parsers/test_sources_api_e2e.py @@ -6,6 +6,7 @@ from unittest.mock import patch from apps.core.models import BackgroundJob, JobStatus from apps.parsers.models import FinancialReport, FinancialReportLine, ParserLoadLog from django.urls import reverse +from organizations.source_backfill import OrganizationSourceBackfillService from rest_framework import status from rest_framework.test import APITestCase @@ -89,6 +90,7 @@ class SourcesApiE2ETest(APITestCase): user_id=self.user.id, meta={"source": ParserLoadLog.Source.INSPECTIONS}, ) + OrganizationSourceBackfillService.backfill() self.client.force_authenticate(self.user) cards_response = self.client.get(reverse("api_v1:sources:source-cards-list")) diff --git a/tests/apps/parsers/test_tasks.py b/tests/apps/parsers/test_tasks.py index 6f2f9e4..dc499bb 100644 --- a/tests/apps/parsers/test_tasks.py +++ b/tests/apps/parsers/test_tasks.py @@ -7,6 +7,7 @@ import io import os import tempfile import threading +import unittest from datetime import date from pathlib import Path from types import SimpleNamespace @@ -33,15 +34,9 @@ from apps.parsers.clients.proverki.client import ProverkiClientError from apps.parsers.clients.zakupki import ZakupkiClientError from apps.parsers.models import ( FinancialReport, - GenericParserRecord, - IndustrialCertificateRecord, - IndustrialProductRecord, - InspectionRecord, - ManufacturerRecord, ParserLoadLog, - ProcurementRecord, ) -from apps.parsers.services import ParserLoadLogService +from apps.parsers.services import FNSReportService, ParserLoadLogService from apps.parsers.tasks import ( INDUSTRIAL_PRODUCTS_SOFT_TIME_LIMIT_SECONDS, INDUSTRIAL_PRODUCTS_TIME_LIMIT_SECONDS, @@ -66,6 +61,7 @@ from apps.parsers.tasks import ( ) from django.test import TestCase, override_settings from openpyxl import Workbook +from organizations.models import OrganizationSourceRecord from registers.models import Organization from tests.apps.parsers.factories import ( @@ -170,6 +166,10 @@ class ProxyResolutionTestCase(TestCase): self.assertIsNone(result) +@unittest.skip( + "Legacy post-commit backfill queue is not used when parsers write directly " + "to organization source records." +) class OrganizationSourceBackfillQueueTestCase(TestCase): """Tests parser tasks queue organization source backfill after DB commit.""" @@ -245,6 +245,181 @@ class GenericSourceFetchTestCase(TestCase): ) self.assertFalse(_RecordingStructuredClient.instances[0].kwargs["verify_ssl"]) + @override_settings(CHECKO_API_KEY="") + def test_fstec_enriches_identity_from_local_registry_name_match(self): + Organization.objects.create( + pn_name='ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ "ВЕБЛОК"', + mn_ogrn=1237700253306, + mn_inn=7713497980, + in_kpp=771301001, + mn_okpo="12345678", + ) + source_record = GenericParserItem( + source=ParserLoadLog.Source.FSTEC, + external_id="5066", + organisation_name="ООО «Веблок»", + title="межсетевой экран уровня приложений «Weblock.»", + payload={"Заявитель": "ООО «Веблок»"}, + ) + + with patch.object( + parser_tasks, + "_fetch_structured_records", + return_value=[source_record], + ): + records = parser_tasks._fetch_fstec_records( + file_url=None, + file_path=None, + proxies=[], + ) + + self.assertEqual(len(records), 1) + self.assertEqual(records[0].inn, "7713497980") + self.assertEqual(records[0].ogrn, "1237700253306") + self.assertEqual( + records[0].payload["identity_enrichment"]["provider"], + "local_registry", + ) + + @override_settings(CHECKO_API_KEY="") + def test_fstec_restores_leading_zero_for_local_registry_inn(self): + Organization.objects.create( + pn_name='ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ "БАШНЕФТЬ-ДОБЫЧА"', + mn_ogrn=1090280032699, + mn_inn=277106840, + in_kpp=27701001, + mn_okpo="12345678", + ) + source_record = GenericParserItem( + source=ParserLoadLog.Source.FSTEC, + external_id="1260/174", + organisation_name="ООО «Башнефть-Добыча»", + title="сертификат", + payload={"Заявитель": "ООО «Башнефть-Добыча»"}, + ) + + with patch.object( + parser_tasks, + "_fetch_structured_records", + return_value=[source_record], + ): + records = parser_tasks._fetch_fstec_records( + file_url=None, + file_path=None, + proxies=[], + ) + + self.assertEqual(records[0].inn, "0277106840") + self.assertEqual(records[0].ogrn, "1090280032699") + + @override_settings(CHECKO_API_KEY="test-key") + def test_fstec_enriches_identity_from_checko_exact_name_match(self): + source_record = GenericParserItem( + source=ParserLoadLog.Source.FSTEC, + external_id="5067", + organisation_name="ООО «РуСИЕМ»", + title="SIEM", + payload={"Заявитель": "ООО «РуСИЕМ»"}, + ) + captured_requests = [] + + class _CheckoClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def search(self, request): + captured_requests.append(request) + return SimpleNamespace( + data=SimpleNamespace( + organizations=( + SimpleNamespace( + inn="7731317045", + ogrn="1167746493970", + short_name='ООО "РУСИЕМ"', + full_name=None, + ), + ) + ) + ) + + with ( + patch.object( + parser_tasks, + "_fetch_structured_records", + return_value=[source_record], + ), + patch.object(parser_tasks, "CheckoClient", _CheckoClient), + ): + records = parser_tasks._fetch_fstec_records( + file_url=None, + file_path=None, + proxies=[], + ) + + self.assertEqual(records[0].inn, "7731317045") + self.assertEqual(records[0].ogrn, "1167746493970") + self.assertEqual(captured_requests[0].query, "ООО «РуСИЕМ»") + self.assertTrue(captured_requests[0].active) + self.assertEqual( + records[0].payload["identity_enrichment"]["provider"], + "checko", + ) + + @override_settings(CHECKO_API_KEY="test-key") + def test_fstec_does_not_enrich_ambiguous_checko_exact_name_match(self): + source_record = GenericParserItem( + source=ParserLoadLog.Source.FSTEC, + external_id="4982", + organisation_name="ООО «Аванпост»", + title="сертификат", + payload={"Заявитель": "ООО «Аванпост»"}, + ) + + class _CheckoClient: + def __init__(self, **_kwargs): + return + + def search(self, _request): + return SimpleNamespace( + data=SimpleNamespace( + organizations=( + SimpleNamespace( + inn="7722778473", + ogrn="1127746453966", + short_name='ООО "АВАНПОСТ"', + full_name=None, + ), + SimpleNamespace( + inn="5403011237", + ogrn="1155476129753", + short_name='ООО "АВАНПОСТ"', + full_name=None, + ), + ) + ) + ) + + with ( + patch.object( + parser_tasks, + "_fetch_structured_records", + return_value=[source_record], + ), + patch.object(parser_tasks, "CheckoClient", _CheckoClient), + ): + records = parser_tasks._fetch_fstec_records( + file_url=None, + file_path=None, + proxies=[], + ) + + self.assertEqual(records[0].inn, "") + self.assertEqual(records[0].ogrn, "") + self.assertEqual( + records[0].payload["identity_enrichment"]["status"], + "ambiguous", + ) + @override_settings(CHECKO_API_KEY="test-key", FEDRESURS_CHECKO_FALLBACK_LIMIT=10) def test_fedresurs_falls_back_to_checko_for_active_registry_organizations(self): organization = Organization.objects.create( @@ -434,7 +609,7 @@ class GenericSourceFetchTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["saved"], 1) - record = GenericParserRecord.objects.get( + record = OrganizationSourceRecord.objects.get( source=ParserLoadLog.Source.ARBITRATION ) self.assertEqual(record.registry_organization_id, organization.id) @@ -560,16 +735,18 @@ class GenericSourceFetchTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["saved"], 1) - record = InspectionRecord.objects.get() - self.assertEqual(record.registration_number, "erp-1") + record = OrganizationSourceRecord.objects.get( + source=ParserLoadLog.Source.INSPECTIONS + ) + self.assertEqual(record.external_id, "erp-1") self.assertEqual(record.inn, str(organization.mn_inn)) self.assertEqual(record.ogrn, str(organization.mn_ogrn)) - self.assertEqual(record.organisation_name, organization.pn_name) - self.assertEqual(record.control_authority, "Ростехнадзор") - self.assertEqual(record.inspection_type, "scheduled") - self.assertEqual(record.inspection_form, "documentary") + self.assertEqual(record.extension.organization.name, organization.pn_name) + self.assertEqual(record.payload["control_authority"], "Ростехнадзор") + self.assertEqual(record.payload["inspection_type"], "scheduled") + self.assertEqual(record.payload["inspection_form"], "documentary") self.assertEqual(record.status, "planned") - self.assertEqual(record.legal_basis, "checko") + self.assertEqual(record.payload["legal_basis"], "checko") self.assertEqual(record.registry_organization_id, organization.id) self.assertEqual( [request.inn for request in _CheckoClient.instances[0].requests], @@ -644,7 +821,9 @@ class GenericSourceFetchTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["saved"], 1) - record = GenericParserRecord.objects.get(source=ParserLoadLog.Source.CONTRACTS) + record = OrganizationSourceRecord.objects.get( + source=ParserLoadLog.Source.CONTRACTS + ) self.assertEqual(record.registry_organization_id, organization.id) self.assertEqual(record.inn, str(organization.mn_inn)) self.assertEqual(record.ogrn, str(organization.mn_ogrn)) @@ -770,7 +949,12 @@ class ParseProcurementsTaskTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["saved"], len(rows)) - self.assertGreater(ProcurementRecord.objects.count(), 0) + self.assertGreater( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.PROCUREMENTS + ).count(), + 0, + ) def test_parse_procurements_failure(self): with TestHTTPServer() as server: @@ -1189,9 +1373,24 @@ class MinpromtorgTasksTestCase(TestCase): self.assertIn("industrial", result) self.assertIn("industrial_products", result) self.assertIn("manufactures", result) - self.assertEqual(IndustrialCertificateRecord.objects.count(), len(cert_rows)) - self.assertEqual(IndustrialProductRecord.objects.count(), len(product_rows)) - self.assertEqual(ManufacturerRecord.objects.count(), len(manuf_rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL + ).count(), + len(cert_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS + ).count(), + len(product_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.MANUFACTURES + ).count(), + len(manuf_rows), + ) def test_parse_all_sources_success(self): with TestHTTPServer() as server: @@ -1206,10 +1405,30 @@ class MinpromtorgTasksTestCase(TestCase): self.assertIn("industrial_products", result) self.assertIn("manufactures", result) self.assertIn("inspections", result) - self.assertEqual(IndustrialCertificateRecord.objects.count(), len(cert_rows)) - self.assertEqual(IndustrialProductRecord.objects.count(), len(product_rows)) - self.assertEqual(ManufacturerRecord.objects.count(), len(manuf_rows)) - self.assertEqual(InspectionRecord.objects.count(), 0) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL + ).count(), + len(cert_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS + ).count(), + len(product_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.MANUFACTURES + ).count(), + len(manuf_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INSPECTIONS + ).count(), + 0, + ) def test_parse_all_minpromtorg_without_adapter(self): with TestHTTPServer() as server: @@ -1282,9 +1501,24 @@ class MinpromtorgTasksTestCase(TestCase): self.assertIn("industrial", result) self.assertIn("industrial_products", result) self.assertIn("manufactures", result) - self.assertEqual(IndustrialCertificateRecord.objects.count(), len(cert_rows)) - self.assertEqual(IndustrialProductRecord.objects.count(), len(product_rows)) - self.assertEqual(ManufacturerRecord.objects.count(), len(manuf_rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL + ).count(), + len(cert_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS + ).count(), + len(product_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.MANUFACTURES + ).count(), + len(manuf_rows), + ) def test_parse_all_sources_without_adapter(self): with TestHTTPServer() as server: @@ -1364,9 +1598,24 @@ class MinpromtorgTasksTestCase(TestCase): self.assertIn("industrial_products", result) self.assertIn("manufactures", result) self.assertIn("inspections", result) - self.assertEqual(IndustrialCertificateRecord.objects.count(), len(cert_rows)) - self.assertEqual(IndustrialProductRecord.objects.count(), len(product_rows)) - self.assertEqual(ManufacturerRecord.objects.count(), len(manuf_rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL + ).count(), + len(cert_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS + ).count(), + len(product_rows), + ) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.MANUFACTURES + ).count(), + len(manuf_rows), + ) def test_parse_industrial_production_failure(self): date_str = fake.date_between(start_date="-30d", end_date="today").strftime( @@ -1401,7 +1650,12 @@ class MinpromtorgTasksTestCase(TestCase): result = parse_industrial_production(client_adapter=server.adapter) self.assertEqual(result["status"], "success") - self.assertEqual(IndustrialCertificateRecord.objects.count(), len(cert_rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL + ).count(), + len(cert_rows), + ) def test_parse_industrial_products_failure(self): date_str = fake.date_between(start_date="-30d", end_date="today").strftime( @@ -1436,7 +1690,12 @@ class MinpromtorgTasksTestCase(TestCase): result = parse_industrial_products(client_adapter=server.adapter) self.assertEqual(result["status"], "success") - self.assertEqual(IndustrialProductRecord.objects.count(), len(product_rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS + ).count(), + len(product_rows), + ) def test_parse_manufactures_failure(self): date_str = fake.date_between(start_date="-30d", end_date="today").strftime( @@ -1489,7 +1748,12 @@ class ParseInspectionsTaskTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["saved"], len(rows)) - self.assertEqual(InspectionRecord.objects.count(), len(rows)) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.INSPECTIONS + ).count(), + len(rows), + ) def test_parse_inspections_with_default_proxies(self): xml_content, rows = build_proverki_xml(count=1) @@ -1801,7 +2065,12 @@ class FNSFileTasksTestCase(TestCase): result = self._scan_with_eager_process() self.assertEqual(result["queued"], 1) self.assertEqual(result["skipped"], 0) - self.assertEqual(FinancialReport.objects.count(), 1) + self.assertEqual( + OrganizationSourceRecord.objects.filter( + source=ParserLoadLog.Source.FNS_REPORTS + ).count(), + 1, + ) self.assertFalse(os.path.exists(file_path)) self.assertTrue( os.path.exists(os.path.join(processed_dir, os.path.basename(file_path))) @@ -1887,14 +2156,14 @@ class FNSFileTasksTestCase(TestCase): file_path = self._write_fns_file(watch_dir) file_hash = hashlib.sha256(Path(file_path).read_bytes()).hexdigest() - FinancialReport.objects.create( + FNSReportService.save_report( external_id=fake.pystr(min_chars=6, max_chars=10), ogrn=_digits(13), file_name=os.path.basename(file_path), file_hash=file_hash, - load_batch=fake.random_int(min=1, max=9999), - status=FinancialReport.Status.SUCCESS, source=FinancialReport.SourceType.FILE_WATCH, + batch_id=fake.random_int(min=1, max=9999), + lines_data=[], ) with override_settings( @@ -1959,14 +2228,14 @@ class FNSFileTasksTestCase(TestCase): file_path = self._write_fns_file(watch_dir) file_hash = hashlib.sha256(Path(file_path).read_bytes()).hexdigest() - FinancialReport.objects.create( + FNSReportService.save_report( external_id=fake.pystr(min_chars=6, max_chars=10), ogrn=_digits(13), file_name=os.path.basename(file_path), file_hash=file_hash, - load_batch=fake.random_int(min=1, max=9999), - status=FinancialReport.Status.SUCCESS, source=FinancialReport.SourceType.FILE_WATCH, + batch_id=fake.random_int(min=1, max=9999), + lines_data=[], ) with override_settings( @@ -1989,27 +2258,14 @@ class FNSFileTasksTestCase(TestCase): watch_dir, processed_dir, failed_dir = self._dirs(tmpdir) os.makedirs(watch_dir, exist_ok=True) file_path = self._write_fns_file(watch_dir) - filename = os.path.basename(file_path) - external_id, ogrn = ( - filename.replace("fin_", "").replace(".xlsx", "").split("_", 1) - ) - - FinancialReport.objects.create( - external_id=external_id, - ogrn=ogrn, - file_name=filename, - file_hash=hashlib.sha256( - fake.pystr(min_chars=8, max_chars=12).encode("utf-8") - ).hexdigest(), - load_batch=fake.random_int(min=1, max=9999), - status=FinancialReport.Status.SUCCESS, - source=FinancialReport.SourceType.FILE_WATCH, - ) with override_settings( FNS_WATCH_DIRECTORY=watch_dir, FNS_PROCESSED_DIRECTORY=processed_dir, FNS_FAILED_DIRECTORY=failed_dir, + ), patch( + "apps.parsers.tasks.FNSReportService.save_report", + side_effect=RuntimeError("database unavailable"), ): result = _process_fns_file_sync( file_path, @@ -2141,7 +2397,7 @@ class ParseVacanciesTaskTestCase(TestCase): ) self.assertEqual( set( - GenericParserRecord.objects.filter( + OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.TRUDVSEM ).values_list("external_id", flat=True) ), @@ -2251,7 +2507,7 @@ class ParseVacanciesTaskTestCase(TestCase): self.assertEqual(captured_fetches, ["7701000101", "7701000102"]) self.assertEqual( set( - GenericParserRecord.objects.filter( + OrganizationSourceRecord.objects.filter( source=ParserLoadLog.Source.TRUDVSEM ).values_list("external_id", flat=True) ), @@ -2350,13 +2606,14 @@ class ParseVacanciesTaskTestCase(TestCase): ) self.assertEqual(result["saved"], 3) self.assertEqual( - set( - GenericParserRecord.objects.values_list( - "external_id", - "source", - "registry_organization_id", + { + ( + record.external_id, + record.source, + record.registry_organization_id, ) - ), + for record in OrganizationSourceRecord.objects.all() + }, { ("trudvsem:romashka", "trudvsem", organization.id), ("hh:romashka", "hh", organization.id), @@ -2474,7 +2731,7 @@ class ParseVacanciesTaskTestCase(TestCase): self.assertEqual(captured_fetch_kwargs["text"], "инженер") self.assertEqual( set( - GenericParserRecord.objects.values_list( + OrganizationSourceRecord.objects.values_list( "external_id", "source", ) diff --git a/tests/apps/parsers/test_views.py b/tests/apps/parsers/test_views.py index 45f9e50..857a9bc 100644 --- a/tests/apps/parsers/test_views.py +++ b/tests/apps/parsers/test_views.py @@ -19,7 +19,7 @@ from apps.parsers.models import ( from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from openpyxl import Workbook -from organizations.models import Organization +from organizations.models import Organization, OrganizationSourceRecord from organizations.source_ingestion import ( OrganizationSourceIngestionService, SourceRecordInput, @@ -1063,7 +1063,8 @@ class ParsersViewSetTest(APITestCase): self.assertEqual(response.data["queued"], 1) self.assertEqual(response.data["skipped"], 0) self.assertEqual(response.data["invalid"], 0) - self.assertEqual(FinancialReport.objects.count(), 1) + self.assertEqual(FinancialReport.objects.count(), 0) + self.assertEqual(OrganizationSourceRecord.objects.count(), 1) def test_parsing_settings_get_and_patch(self): self.client.force_authenticate(self.admin) diff --git a/tests/apps/user/test_views.py b/tests/apps/user/test_views.py index 8a115c7..c668ffd 100644 --- a/tests/apps/user/test_views.py +++ b/tests/apps/user/test_views.py @@ -1,6 +1,5 @@ """Tests for user DRF views""" -from apps.user.models import Profile from apps.user.services import UserService from django.contrib.auth import get_user_model from django.urls import reverse @@ -16,7 +15,7 @@ fake = Faker("ru_RU") class RegisterViewTest(APITestCase): - """Tests for RegisterView""" + """Tests for disabled public registration endpoint.""" def setUp(self): self.register_url = reverse("api_v1:user:register") @@ -32,67 +31,11 @@ class RegisterViewTest(APITestCase): "last_name": fake.last_name(), } - def test_register_success(self): - """Test successful user registration""" + def test_register_endpoint_is_disabled(self): response = self.client.post(self.register_url, self.user_data, format="json") - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertIn("user", response.data) - self.assertIn("tokens", response.data) - self.assertIn("refresh", response.data["tokens"]) - self.assertIn("access", response.data["tokens"]) - - # Verify user was created - created_user = User.objects.get(email=self.user_data["email"]) - self.assertEqual(created_user.profile.first_name, self.user_data["first_name"]) - self.assertEqual(created_user.profile.last_name, self.user_data["last_name"]) - - def test_register_passwords_do_not_match(self): - """Test registration fails when passwords don't match""" - data = self.user_data.copy() - data["password_confirm"] = fake.password(length=12, special_chars=False) - - response = self.client.post(self.register_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("non_field_errors", response.data) - - def test_register_duplicate_email(self): - """Test registration fails with duplicate email""" - # Create existing user - existing_user = UserFactory.create_user() - - # Use the same email as existing user - data = self.user_data.copy() - data["email"] = existing_user.email - - response = self.client.post(self.register_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("email", response.data) - - def test_register_short_password(self): - """Test registration fails with short password""" - short_password = fake.pystr(min_chars=3, max_chars=5) - data = self.user_data.copy() - data["password"] = short_password - data["password_confirm"] = short_password - - response = self.client.post(self.register_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("password", response.data) - - def test_register_requires_first_and_last_name(self): - data = self.user_data.copy() - data.pop("first_name") - data.pop("last_name") - - response = self.client.post(self.register_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("first_name", response.data) - self.assertIn("last_name", response.data) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + self.assertFalse(User.objects.filter(email=self.user_data["email"]).exists()) class LoginViewTest(APITestCase): @@ -211,7 +154,7 @@ class CurrentUserViewTest(APITestCase): class UserUpdateViewTest(APITestCase): - """Tests for UserUpdateView""" + """Tests for disabled self-service user update endpoint.""" def setUp(self): self.user = UserFactory.create_user() @@ -224,30 +167,12 @@ class UserUpdateViewTest(APITestCase): "phone": f"+7{fake.numerify('##########')}", } - def test_update_user_success(self): - """Test successful user update""" + def test_update_user_endpoint_is_disabled(self): response = self.client.patch(self.update_url, self.update_data, format="json") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data["username"], self.update_data["username"]) - self.assertEqual(response.data["phone"], self.update_data["phone"]) - - # Verify in database + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) self.user.refresh_from_db() - self.assertEqual(self.user.username, self.update_data["username"]) - - def test_update_user_unauthenticated(self): - """Test user update fails when unauthenticated""" - self.client.credentials() # Remove auth header - response = self.client.patch(self.update_url, self.update_data, format="json") - - self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) - - def test_update_user_invalid_returns_400(self): - response = self.client.patch(self.update_url, {"username": ""}, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("username", response.data) + self.assertNotEqual(self.user.username, self.update_data["username"]) class AdminUserManagementViewTest(APITestCase): @@ -529,6 +454,27 @@ class AdminUserManagementViewTest(APITestCase): self.user.refresh_from_db() self.assertFalse(self.user.is_active) + def test_admin_deactivate_user_returns_managed_user_payload(self): + url = reverse("api_v1:user:admin-user-deactivate", args=[self.user.id]) + + response = self.client.post(url, {}, format="json") + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual( + set(response.data.keys()), + { + "id", + "username", + "email", + "phone", + "role", + "role_label", + "is_active", + }, + ) + self.assertEqual(response.data["id"], self.user.id) + self.assertFalse(response.data["is_active"]) + def test_admin_can_activate_user(self): self.user.is_active = False self.user.save(update_fields=["is_active"]) @@ -569,7 +515,7 @@ class AdminUserManagementViewTest(APITestCase): class ProfileDetailViewTest(APITestCase): - """Tests for ProfileDetailView""" + """Tests for disabled self-service profile endpoints.""" def setUp(self): self.user = UserFactory.create_user() @@ -585,57 +531,26 @@ class ProfileDetailViewTest(APITestCase): "bio": fake.text(max_nb_chars=200), } - def test_get_profile_success(self): - """Test successful profile retrieval""" + def test_profile_read_endpoint_is_disabled(self): response = self.client.get(self.profile_url) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data["first_name"], self.profile.first_name) - self.assertEqual(response.data["middle_name"], self.profile.middle_name) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) - def test_update_profile_success(self): - """Test successful profile update""" + def test_profile_update_endpoint_is_disabled(self): response = self.client.patch(self.profile_url, self.update_data, format="json") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data["first_name"], self.update_data["first_name"]) - self.assertEqual(response.data["middle_name"], self.update_data["middle_name"]) - self.assertEqual(response.data["last_name"], self.update_data["last_name"]) - - # Verify in database + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) self.profile.refresh_from_db() - self.assertEqual(self.profile.first_name, self.update_data["first_name"]) + self.assertNotEqual(self.profile.first_name, self.update_data["first_name"]) - def test_profile_created_if_not_exists(self): - """Test profile is created if it doesn't exist""" - # Delete existing profile - self.profile.delete() - - response = self.client.get(self.profile_url) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - # Profile should be created automatically - self.assertTrue(Profile.objects.filter(user=self.user).exists()) - - def test_update_profile_invalid_returns_400(self): - response = self.client.patch( - self.profile_url, - {"date_of_birth": "not-a-date"}, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("date_of_birth", response.data) - - def test_get_full_profile_endpoint(self): + def test_profile_full_endpoint_is_disabled(self): response = self.client.get(reverse("api_v1:user:profile_full")) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data["id"], self.user.id) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) class PasswordChangeViewTest(APITestCase): - """Tests for PasswordChangeView""" + """Tests for disabled self-service password change endpoint.""" def setUp(self): self.old_password = fake.password(length=12, special_chars=False) @@ -651,38 +566,14 @@ class PasswordChangeViewTest(APITestCase): "new_password_confirm": self.new_password, } - def test_change_password_success(self): - """Test successful password change""" + def test_change_password_endpoint_is_disabled(self): response = self.client.post( self.password_change_url, self.password_data, format="json" ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIn("message", response.data) - - # Verify password was changed + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) self.user.refresh_from_db() - self.assertTrue(self.user.check_password(self.new_password)) - - def test_change_password_wrong_old_password(self): - """Test password change fails with wrong old password""" - data = self.password_data.copy() - data["old_password"] = fake.password(length=12, special_chars=False) - - response = self.client.post(self.password_change_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("error", response.data) - - def test_change_password_passwords_do_not_match(self): - """Test password change fails when new passwords don't match""" - data = self.password_data.copy() - data["new_password_confirm"] = fake.password(length=12, special_chars=False) - - response = self.client.post(self.password_change_url, data, format="json") - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertIn("non_field_errors", response.data) + self.assertTrue(self.user.check_password(self.old_password)) class TokenRefreshViewTest(APITestCase): @@ -725,7 +616,7 @@ class TokenRefreshViewTest(APITestCase): class TokenVerifyViewTest(APITestCase): - def test_verify_access_token_success(self): + def test_verify_access_token_endpoint_is_disabled(self): user = UserFactory.create_user() tokens = UserService.get_tokens_for_user(user) @@ -735,11 +626,11 @@ class TokenVerifyViewTest(APITestCase): format="json", ) - self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) -class ApiJwtOnlyAuthenticationTest(APITestCase): - """Tests that API auth flow is JWT-only and not session-cookie based.""" +class DisabledSelfServiceAuthenticationTest(APITestCase): + """Tests disabled self-service endpoints do not mutate users.""" def setUp(self): self.user = UserFactory.create_user() @@ -752,17 +643,14 @@ class ApiJwtOnlyAuthenticationTest(APITestCase): self.client.cookies["sessionid"] = "fake-admin-session" self.client.cookies["csrftoken"] = "fake-csrf-token" - def test_patch_with_bearer_and_session_cookies_returns_200(self): - """Bearer JWT should authenticate even if session cookies are present.""" + def test_patch_with_bearer_and_session_cookies_returns_405(self): self.client.credentials(HTTP_AUTHORIZATION=f"Bearer {self.tokens['access']}") response = self.client.patch(self.update_url, self.patch_data, format="json") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data["id"], self.user.id) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) - def test_patch_with_only_session_cookies_returns_401_not_403(self): - """Session cookies without JWT should not trigger CSRF 403 for API auth.""" + def test_patch_with_only_session_cookies_returns_405(self): response = self.client.patch(self.update_url, self.patch_data, format="json") - self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) diff --git a/tests/settings/test_dev_settings.py b/tests/settings/test_dev_settings.py new file mode 100644 index 0000000..ce59c46 --- /dev/null +++ b/tests/settings/test_dev_settings.py @@ -0,0 +1,17 @@ +from importlib import import_module, reload + + +def test_dev_settings_use_exchange_defaults_when_env_values_are_empty(monkeypatch): + expected_dev_token = "state-corp-dev-exchange-token-v1" # noqa: S105 + + monkeypatch.setenv("STATE_CORP_EXCHANGE_URL", "") + monkeypatch.setenv("STATE_CORP_EXCHANGE_TOKEN", "") + + dev_settings = reload(import_module("settings.dev")) + actual_dev_token = dev_settings.STATE_CORP_EXCHANGE_TOKEN + + assert ( + dev_settings.STATE_CORP_EXCHANGE_URL + == "http://127.0.0.1:8001/api/v1/exchange/packages/upload/" + ) + assert actual_dev_token == expected_dev_token diff --git a/tests/test_api_inventory_e2e.py b/tests/test_api_inventory_e2e.py index 495749f..49fe322 100644 --- a/tests/test_api_inventory_e2e.py +++ b/tests/test_api_inventory_e2e.py @@ -134,10 +134,9 @@ class UserApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): def setUp(self): self.admin = UserFactory.create_superuser() - def test_auth_and_profile_endpoints(self): + def test_auth_endpoints_and_disabled_self_service_endpoints(self): initial_password = fake.password(length=16, special_chars=False) - new_password = fake.password(length=18, special_chars=False) - register_payload = { + disabled_registration_payload = { "email": fake.unique.email(), "username": fake.unique.user_name(), "password": initial_password, @@ -147,19 +146,25 @@ class UserApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): "middle_name": "Ivanovich", "last_name": "Ivanov", } + user = UserFactory.create_user( + username=fake.unique.user_name(), + password=initial_password, + ) register_response = self.client.post( reverse("api_v1:user:register"), - register_payload, + disabled_registration_payload, format="json", ) - self.assertEqual(register_response.status_code, status.HTTP_201_CREATED) - self.assertIn("tokens", register_response.data) + self.assertEqual( + register_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) login_response = self.client.post( reverse("api_v1:user:login"), { - "username": register_payload["username"], + "username": user.username, "password": initial_password, }, format="json", @@ -171,7 +176,10 @@ class UserApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): {"token": login_response.data["access"]}, format="json", ) - self.assertEqual(verify_response.status_code, status.HTTP_200_OK) + self.assertEqual( + verify_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) refresh_response = self.client.post( reverse("api_v1:user:token_refresh"), @@ -185,17 +193,23 @@ class UserApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): me_response = self.client.get(reverse("api_v1:user:current_user")) self.assertEqual(me_response.status_code, status.HTTP_200_OK) - self.assertEqual(me_response.data["username"], register_payload["username"]) + self.assertEqual(me_response.data["username"], user.username) me_update_response = self.client.patch( reverse("api_v1:user:user_update"), {"phone": f"+7{fake.numerify('##########')}"}, format="json", ) - self.assertEqual(me_update_response.status_code, status.HTTP_200_OK) + self.assertEqual( + me_update_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) profile_response = self.client.get(reverse("api_v1:user:profile_detail")) - self.assertEqual(profile_response.status_code, status.HTTP_200_OK) + self.assertEqual( + profile_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) profile_patch_response = self.client.patch( reverse("api_v1:user:profile_detail"), @@ -206,38 +220,31 @@ class UserApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): }, format="json", ) - self.assertEqual(profile_patch_response.status_code, status.HTTP_200_OK) + self.assertEqual( + profile_patch_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) profile_full_response = self.client.get(reverse("api_v1:user:profile_full")) - self.assertEqual(profile_full_response.status_code, status.HTTP_200_OK) self.assertEqual( - profile_full_response.data["username"], register_payload["username"] + profile_full_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, ) password_change_response = self.client.post( reverse("api_v1:user:password_change"), { "old_password": initial_password, - "new_password": new_password, - "new_password_confirm": new_password, + "new_password": f"{initial_password}1", + "new_password_confirm": f"{initial_password}1", }, format="json", ) - self.assertEqual(password_change_response.status_code, status.HTTP_200_OK) - - relogin_response = self.client.post( - reverse("api_v1:user:login"), - { - "username": register_payload["username"], - "password": new_password, - }, - format="json", + self.assertEqual( + password_change_response.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, ) - self.assertEqual(relogin_response.status_code, status.HTTP_200_OK) - self.client.credentials( - HTTP_AUTHORIZATION=f"Bearer {relogin_response.data['access']}" - ) logout_response = self.client.post( reverse("api_v1:user:logout"), {}, @@ -466,6 +473,11 @@ class ParsersApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): self.assertEqual(sources_statuses.status_code, status.HTTP_200_OK) self.assertEqual(source_detail.status_code, status.HTTP_200_OK) + main_dashboard = self.client.get(reverse("api_v1:stat:main-dashboard")) + self.assertEqual(main_dashboard.status_code, status.HTTP_200_OK) + self.assertIn("source_cards", main_dashboard.data["data"]) + self.assertIn("organization_stats", main_dashboard.data["data"]) + self.authenticate(self.admin) parsing_url = reverse("api_v1:parsing:parsing-settings") parsing_get = self.client.get(parsing_url) @@ -600,12 +612,8 @@ class ExchangeApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): @patch("apps.exchange.services.ExchangeConnectionService.prepare_target_structure") @patch("apps.exchange.services.ExchangeConnectionService.test_connection") @patch("apps.exchange.services.ExchangeConnectionService.test_connection_payload") - @patch("apps.exchange.views.copy_parsers_data_async") - @patch("apps.exchange.services.ExchangeConnectionService.get_active_connection") def test_exchange_endpoints( self, - get_active_connection_mock, - copy_task_mock, test_connection_payload_mock, _test_connection_mock, _prepare_mock, @@ -615,6 +623,7 @@ class ExchangeApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): connections_url = reverse("api_v1:exchange:connections") test_connection_url = reverse("api_v1:exchange:connections-test") copy_url = reverse("api_v1:exchange:copy") + tables_url = reverse("api_v1:exchange:tables") periodic_tasks_url = reverse("api_v1:exchange:periodic-tasks") connection_payload = { @@ -646,9 +655,8 @@ class ExchangeApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): active_connection = ExchangeConnection.objects.get( id=create_connection.data["id"] ) - get_active_connection_mock.return_value = active_connection - copy_task_mock.delay.return_value = SimpleNamespace(id="exchange-task-1") copy_response = self.client.post(copy_url, {"mode": "all"}, format="json") + tables_response = self.client.get(tables_url) list_periodic = self.client.get(periodic_tasks_url) create_periodic = self.client.post( @@ -682,10 +690,8 @@ class ExchangeApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): list_connections, create_connection, connection_test, - copy_response, list_periodic, create_periodic, - detail_periodic, patch_periodic, ): self.assertIn( @@ -693,10 +699,15 @@ class ExchangeApiInventoryE2ETest(AuthenticatedApiMixin, APITestCase): { status.HTTP_200_OK, status.HTTP_201_CREATED, - status.HTTP_202_ACCEPTED, }, ) + self.assertEqual(copy_response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + self.assertEqual(tables_response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + self.assertEqual( + detail_periodic.status_code, + status.HTTP_405_METHOD_NOT_ALLOWED, + ) self.assertTrue(PeriodicTask.objects.filter(id=periodic_id).exists()) self.assertTrue( ExchangeConnection.objects.filter(id=active_connection.id).exists()