feat(organizations): migrate source storage to polymorphic records

This commit is contained in:
2026-05-19 10:23:53 +02:00
parent 19a7d5a91c
commit 4ca2fa25d5
44 changed files with 7129 additions and 1551 deletions

View File

@@ -1,39 +1,46 @@
"""Views for organizations API v2."""
"""Views for organization-centric API v2."""
from __future__ import annotations
import hashlib
import json
from typing import Any
from apps.core.openapi import swagger_tag
from django.conf import settings
from django.core.cache import cache
from django.db.models import CharField, Q
from django.db.models.functions import Cast
from django_filters import rest_framework as filters
from drf_yasg import openapi
from drf_yasg.utils import swagger_auto_schema
from rest_framework.exceptions import ValidationError
from registers.models import RegistryMembershipPeriod
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.permissions import AllowAny, IsAuthenticated
from rest_framework.response import Response
from rest_framework.viewsets import ReadOnlyModelViewSet
from organizations.api_enrichment import (
API_DATA_SOURCE_KEY_SET,
OrganizationApiEnrichmentService,
to_api_data_source,
to_internal_data_source,
)
from organizations.cache import (
DEFAULT_ORGANIZATION_API_CACHE_TIMEOUT_SECONDS,
ORGANIZATION_API_CACHE_PREFIX,
get_organization_api_cache_version,
)
from organizations.filters import OrganizationFilter
from organizations.models import Organization
from organizations.serializers import OrganizationSerializer
from organizations.models import (
Organization,
OrganizationSourceExtension,
OrganizationSourceRecord,
SourceGroup,
)
from organizations.serializers import (
OrganizationSerializer,
OrganizationSourceExtensionSerializer,
OrganizationSourceRecordSerializer,
)
ORGANIZATIONS_TAG = swagger_tag("Организации", "Organizations")
ORGANIZATION_DATA_SOURCE_KEYS = ", ".join(sorted(API_DATA_SOURCE_KEY_SET))
FALSE_QUERY_VALUES = {"0", "false", "no", "off"}
def _query_parameter(
@@ -57,38 +64,12 @@ def _query_parameter(
)
ORGANIZATION_DATA_PARAMS = [
_query_parameter(
"data",
description=(
"Ограничить блок data одним или несколькими источниками. "
f"Допустимые значения: {ORGANIZATION_DATA_SOURCE_KEYS}. "
"Можно передать несколько параметров или CSV-строку. "
"На list endpoint блок data по умолчанию пустой; передайте этот "
"параметр, чтобы вернуть данные источников."
),
),
_query_parameter(
"data_sources",
description=(
"Alias параметра data. Оставлен для явного указания набора источников."
),
),
_query_parameter(
"exclude_data",
description=(
"Исключить один или несколько источников из блока data. "
f"Допустимые значения: {ORGANIZATION_DATA_SOURCE_KEYS}."
),
),
_query_parameter(
"exclude_data_sources",
description=(
"Alias параметра exclude_data. Можно передать несколько значений "
"или CSV-строку."
),
),
]
def _is_truthy_query_value(value: str) -> bool:
return value.strip().lower() not in FALSE_QUERY_VALUES
SOURCE_GROUP_VALUES = [choice.value for choice in SourceGroup]
ORGANIZATION_LIST_PARAMS = [
_query_parameter(
"page",
@@ -104,12 +85,12 @@ ORGANIZATION_LIST_PARAMS = [
),
_query_parameter(
"search",
description="Полнотекстовый поиск по наименованию, ИНН, КПП, ОГРН и ОГРИП.",
description="Поиск по наименованию, ИНН, КПП, ОГРН, ОГРИП и основному идентификатору.",
),
_query_parameter(
"ordering",
description=(
"Сортировка по uid, name, inn, kpp, ogrn или ogrip. "
"Сортировка по uid, name, inn, kpp, ogrn, ogrip или identity_status. "
"Префикс '-' включает обратный порядок."
),
),
@@ -118,6 +99,11 @@ ORGANIZATION_LIST_PARAMS = [
_query_parameter("kpp", description="Точный фильтр по КПП."),
_query_parameter("ogrn", description="Точный фильтр по ОГРН."),
_query_parameter("ogrip", description="Точный фильтр по ОГРИП."),
_query_parameter(
"identity_status",
description="Фильтр полноты реквизитов организации.",
enum=[choice.value for choice in Organization.IdentityStatus],
),
_query_parameter(
"registry",
description="UUID реестра. Возвращает организации из активного участия.",
@@ -136,15 +122,19 @@ ORGANIZATION_LIST_PARAMS = [
param_type=openapi.TYPE_BOOLEAN,
default=True,
),
_query_parameter(
"source_group",
description="Фильтр по группе источников организации.",
enum=SOURCE_GROUP_VALUES,
),
*[
_query_parameter(
f"has_{source}",
description=f"Фильтр наличия данных источника {source}.",
f"has_{source_group}",
description=f"Фильтр наличия группы источников {source_group}.",
param_type=openapi.TYPE_BOOLEAN,
)
for source in sorted(API_DATA_SOURCE_KEY_SET)
for source_group in SOURCE_GROUP_VALUES
],
*ORGANIZATION_DATA_PARAMS,
]
ORGANIZATION_DETAIL_PARAMS = [
openapi.Parameter(
@@ -155,89 +145,54 @@ ORGANIZATION_DETAIL_PARAMS = [
required=True,
description="UID организации.",
),
*ORGANIZATION_DATA_PARAMS,
]
ORGANIZATION_SCHEMA = openapi.Schema(
type=openapi.TYPE_OBJECT,
required=["uid", "name", "inn", "data", "data_sources", "registries"],
properties={
"uid": openapi.Schema(type=openapi.TYPE_STRING, format=openapi.FORMAT_UUID),
"name": openapi.Schema(type=openapi.TYPE_STRING),
"normalized_name": openapi.Schema(type=openapi.TYPE_STRING),
"inn": openapi.Schema(type=openapi.TYPE_STRING),
"kpp": openapi.Schema(type=openapi.TYPE_STRING),
"ogrn": openapi.Schema(type=openapi.TYPE_STRING),
"ogrip": openapi.Schema(type=openapi.TYPE_STRING),
"data": openapi.Schema(
type=openapi.TYPE_OBJECT,
description=(
"Данные по источникам. Ключи управляются параметрами data/"
"exclude_data."
),
additional_properties=openapi.Schema(
type=openapi.TYPE_ARRAY,
items=openapi.Schema(type=openapi.TYPE_OBJECT),
),
),
"data_sources": openapi.Schema(
type=openapi.TYPE_ARRAY,
items=openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
"source": openapi.Schema(type=openapi.TYPE_STRING),
"count": openapi.Schema(type=openapi.TYPE_INTEGER),
},
),
),
"registries": openapi.Schema(
type=openapi.TYPE_ARRAY,
items=openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
"id": openapi.Schema(type=openapi.TYPE_STRING),
"name": openapi.Schema(type=openapi.TYPE_STRING),
},
),
),
},
)
ORGANIZATION_LIST_RESPONSE = openapi.Response(
description="Пагинированный список организаций v2.",
schema=openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
"success": openapi.Schema(type=openapi.TYPE_BOOLEAN),
"data": openapi.Schema(
type=openapi.TYPE_ARRAY,
items=ORGANIZATION_SCHEMA,
),
"errors": openapi.Schema(
type=openapi.TYPE_ARRAY,
items=openapi.Schema(type=openapi.TYPE_OBJECT),
description="Список ошибок; null при успешном ответе.",
),
"meta": openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
"pagination": openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
"page": openapi.Schema(type=openapi.TYPE_INTEGER),
"page_size": openapi.Schema(type=openapi.TYPE_INTEGER),
"total_count": openapi.Schema(type=openapi.TYPE_INTEGER),
"total_pages": openapi.Schema(type=openapi.TYPE_INTEGER),
"has_next": openapi.Schema(type=openapi.TYPE_BOOLEAN),
"has_previous": openapi.Schema(type=openapi.TYPE_BOOLEAN),
},
),
},
),
},
SOURCE_EXTENSION_PATH_PARAMS = [
openapi.Parameter(
name="uid",
in_=openapi.IN_PATH,
type=openapi.TYPE_STRING,
format=openapi.FORMAT_UUID,
required=True,
description="UID расширения источника.",
),
]
SOURCE_RECORD_LIST_PARAMS = [
_query_parameter(
"source_group",
description="Фильтр по группе источников.",
enum=SOURCE_GROUP_VALUES,
),
_query_parameter("source", description="Фильтр по legacy source внутри группы."),
_query_parameter("record_type", description="Фильтр по типу записи."),
_query_parameter(
"has_registry",
description="Фильтр наличия активного участия организации записи в любом реестре.",
param_type=openapi.TYPE_BOOLEAN,
),
_query_parameter(
"organization",
description="UID организации.",
format_=openapi.FORMAT_UUID,
),
_query_parameter(
"search",
description=(
"Поиск по организации, реквизитам, заголовку, внешнему ID, "
"статусу, датам, URL и исходным данным записи."
),
),
_query_parameter("page", description="Номер страницы.", param_type=openapi.TYPE_INTEGER),
_query_parameter(
"page_size",
description="Размер страницы. Максимум 100.",
param_type=openapi.TYPE_INTEGER,
),
]
ORGANIZATION_LIST_RESPONSE = openapi.Response(
description="Пагинированный список организаций v2 с компактными источниками.",
)
ORGANIZATION_DETAIL_RESPONSE = openapi.Response(
description="Карточка организации v2.",
schema=ORGANIZATION_SCHEMA,
)
@@ -283,7 +238,7 @@ class CachedReadOnlyMixin:
class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
"""Read-only API for canonical organizations."""
"""Read-only API for canonical organizations and source summaries."""
queryset = Organization.objects.order_by("name", "uid")
serializer_class = OrganizationSerializer
@@ -295,8 +250,23 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
OrderingFilter,
]
filterset_class = OrganizationFilter
search_fields = ["name", "inn", "kpp", "ogrn", "ogrip"]
ordering_fields = ["name", "inn", "kpp", "ogrn", "ogrip", "uid"]
search_fields = [
"name",
"inn",
"kpp",
"ogrn",
"ogrip",
"primary_identity",
]
ordering_fields = [
"name",
"inn",
"kpp",
"ogrn",
"ogrip",
"identity_status",
"uid",
]
ordering = ["name", "uid"]
def get_permissions(self):
@@ -305,10 +275,7 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
return super().get_permissions()
def get_queryset(self):
queryset = super().get_queryset().select_related("data_snapshot")
if self._should_defer_snapshot_data():
queryset = queryset.defer("data_snapshot__data")
queryset = super().get_queryset().prefetch_related("source_extensions")
if self.action != "list" or "has_registry" in self.request.query_params:
return queryset
@@ -321,20 +288,6 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
return filterset.qs
return queryset
def _should_defer_snapshot_data(self) -> bool:
if getattr(self, "action", None) != "list":
return False
return not any(
name in self.request.query_params
for name in (
"data",
"data_sources",
"exclude_data",
"exclude_data_sources",
)
)
@swagger_auto_schema(
tags=[ORGANIZATIONS_TAG],
operation_id="v2_organizations_list",
@@ -343,10 +296,8 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
"Возвращает канонический справочник организаций API v2. "
"По умолчанию показывает только организации с активным участием "
"в реестрах; передайте has_registry=false, чтобы снять это ограничение. "
"Поддерживает пагинацию, поиск по наименованию и реквизитам, фильтры "
"по реестрам и наличию данных по источникам. Для list endpoint "
"тяжелый блок data по умолчанию пустой; передайте data/data_sources, "
"чтобы вернуть данные конкретных источников."
"Данные источников возвращаются компактным списком sources; детальные "
"записи доступны через endpoints расширений источников."
),
manual_parameters=ORGANIZATION_LIST_PARAMS,
responses={200: ORGANIZATION_LIST_RESPONSE},
@@ -354,7 +305,7 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
def list(self, request, *args: Any, **kwargs: Any) -> Response:
return self._cached_response(
request,
lambda: self._list_with_enrichment(request, *args, **kwargs),
lambda: super(OrganizationViewSet, self).list(request, *args, **kwargs),
)
@swagger_auto_schema(
@@ -362,9 +313,8 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
operation_id="v2_organizations_retrieve",
operation_summary="Карточка организации",
operation_description=(
"Возвращает одну организацию по UID с реестрами и данными источников. "
"Параметры data/data_sources и exclude_data/exclude_data_sources "
"позволяют запросить только нужные блоки данных."
"Возвращает одну организацию по UID с активными реестрами и компактными "
"группами источников."
),
manual_parameters=ORGANIZATION_DETAIL_PARAMS,
responses={200: ORGANIZATION_DETAIL_RESPONSE, 404: "Организация не найдена"},
@@ -372,123 +322,237 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
def retrieve(self, request, *args: Any, **kwargs: Any) -> Response:
return self._cached_response(
request,
lambda: self._retrieve_with_enrichment(request, *args, **kwargs),
lambda: super(OrganizationViewSet, self).retrieve(request, *args, **kwargs),
)
def _list_with_enrichment(self, request, *args: Any, **kwargs: Any) -> Response:
queryset = self.filter_queryset(self.get_queryset())
data_sources = self._parse_data_sources(request, default=set())
@swagger_auto_schema(
tags=[ORGANIZATIONS_TAG],
operation_id="v2_organizations_sources",
operation_summary="Источники организации",
operation_description="Возвращает source extensions одной организации.",
responses={200: "Список source extensions", 404: "Организация не найдена"},
)
@action(detail=True, methods=["get"])
def sources(self, request, *args: Any, **kwargs: Any) -> Response:
organization = self.get_object()
serializer = OrganizationSourceExtensionSerializer(
organization.source_extensions.all(),
many=True,
)
return Response(serializer.data)
class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet):
"""Read-only API for source extensions and their records."""
queryset = OrganizationSourceExtension.objects.select_related("organization").order_by(
"organization__name",
"source_group",
)
serializer_class = OrganizationSourceExtensionSerializer
permission_classes = [IsAuthenticated]
lookup_field = "uid"
filter_backends = [OrderingFilter]
ordering_fields = ["source_group", "records_count", "last_seen_at", "uid"]
ordering = ["source_group", "uid"]
def get_permissions(self):
if getattr(settings, "ORGANIZATIONS_V2_ALLOW_ANONYMOUS", False):
return [AllowAny()]
return super().get_permissions()
@swagger_auto_schema(
tags=[ORGANIZATIONS_TAG],
operation_id="v2_organization_sources_records",
operation_summary="Записи источника организации",
operation_description="Возвращает записи под конкретным source extension.",
manual_parameters=SOURCE_EXTENSION_PATH_PARAMS,
responses={200: "Пагинированный список записей источника", 404: "Источник не найден"},
)
@action(detail=True, methods=["get"])
def records(self, request, *args: Any, **kwargs: Any) -> Response:
extension = self.get_object()
queryset = extension.records.prefetch_related("financial_lines").order_by(
"-created_at",
"-uid",
)
page = self.paginate_queryset(queryset)
if page is not None:
organizations = list(page)
enrichment = self._build_missing_snapshot_enrichment(
organizations,
data_sources,
)
serializer = self.get_serializer(
organizations,
many=True,
context={
**self.get_serializer_context(),
"data_sources": data_sources,
"enrichment": enrichment,
},
)
serializer = OrganizationSourceRecordSerializer(page, many=True)
return self.get_paginated_response(serializer.data)
organizations = list(queryset)
enrichment = self._build_missing_snapshot_enrichment(
organizations,
data_sources,
)
serializer = self.get_serializer(
organizations,
many=True,
context={
**self.get_serializer_context(),
"data_sources": data_sources,
"enrichment": enrichment,
},
)
serializer = OrganizationSourceRecordSerializer(queryset, many=True)
return Response(serializer.data)
def _retrieve_with_enrichment(
self,
request,
*args: Any,
**kwargs: Any,
) -> Response:
organization = self.get_object()
data_sources = self._parse_data_sources(request)
enrichment = self._build_missing_snapshot_enrichment(
[organization],
data_sources,
)
serializer = self.get_serializer(
organization,
context={
**self.get_serializer_context(),
"data_sources": data_sources,
"enrichment": enrichment,
},
)
return Response(serializer.data)
class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet):
"""Read-only flat API for source records across source extensions."""
queryset = OrganizationSourceRecord.objects.select_related(
"extension",
"extension__organization",
).prefetch_related("financial_lines").order_by("-created_at", "-uid")
serializer_class = OrganizationSourceRecordSerializer
permission_classes = [IsAuthenticated]
lookup_field = "uid"
filter_backends = [OrderingFilter]
search_fields = [
"title",
"external_id",
"record_type",
"source",
"record_date",
"status",
"url",
"legacy_model",
"legacy_pk",
"source_record_amount_text",
"source_record_load_batch_text",
"source_record_payload_text",
"extension__title",
"extension__source_group",
"extension__organization__name",
"extension__organization__inn",
"extension__organization__kpp",
"extension__organization__ogrn",
"extension__organization__ogrip",
]
ordering_fields = [
"created_at",
"updated_at",
"record_date",
"title",
"uid",
"extension__organization__name",
"extension__organization__inn",
"extension__organization__ogrn",
]
ordering = ["-created_at", "-uid"]
def get_permissions(self):
if getattr(settings, "ORGANIZATIONS_V2_ALLOW_ANONYMOUS", False):
return [AllowAny()]
return super().get_permissions()
def get_queryset(self):
queryset = super().get_queryset()
params = self.request.query_params
source_group = params.get("source_group")
source = params.get("source")
record_type = params.get("record_type")
organization = params.get("organization")
has_registry = params.get("has_registry")
search_terms = SearchFilter().get_search_terms(self.request)
if source_group:
queryset = queryset.filter(extension__source_group=source_group)
if source:
queryset = queryset.filter(source=source)
if record_type:
queryset = queryset.filter(record_type=record_type)
if organization:
queryset = queryset.filter(extension__organization_id=organization)
if has_registry is not None:
registry_query = self._registry_membership_query()
if _is_truthy_query_value(has_registry):
queryset = queryset.filter(registry_query)
else:
queryset = queryset.exclude(registry_query)
if search_terms:
queryset = self._filter_search_queryset(queryset, search_terms)
return queryset
@staticmethod
def _build_missing_snapshot_enrichment(
organizations: list[Organization],
data_sources: set[str] | None,
) -> dict:
missing = [
organization
for organization in organizations
if not hasattr(organization, "data_snapshot")
]
if not missing:
return {}
return OrganizationApiEnrichmentService.build_for(
missing,
data_sources=data_sources,
def _registry_membership_query():
inn_values, ogrn_values = OrganizationFilter._registry_identity_value_querysets()
return (
Q(extension__organization__inn__in=inn_values)
| Q(extension__organization__ogrn__in=ogrn_values)
| Q(extension__organization__ogrip__in=ogrn_values)
)
@staticmethod
def _parse_data_sources(
request,
*,
default: set[str] | None = None,
) -> set[str] | None:
included = _query_param_values(request, "data", "data_sources")
excluded = _query_param_values(request, "exclude_data", "exclude_data_sources")
@classmethod
def _filter_search_queryset(cls, queryset, search_terms: list[str]):
queryset = queryset.annotate(
source_record_amount_text=Cast("amount", output_field=CharField()),
source_record_load_batch_text=Cast(
"load_batch",
output_field=CharField(),
),
source_record_payload_text=Cast("payload", output_field=CharField()),
)
unknown = (included | excluded) - API_DATA_SOURCE_KEY_SET
if unknown:
raise ValidationError(
{
"data": (
"Unknown data source(s): "
+ ", ".join(sorted(unknown))
+ ". Available sources: "
+ ", ".join(sorted(API_DATA_SOURCE_KEY_SET))
for search_term in search_terms:
queryset = queryset.filter(cls._source_record_search_query(search_term))
return queryset
@classmethod
def _source_record_search_query(cls, search_term: str) -> Q:
query = Q()
for field_name in cls.search_fields:
query |= Q(**{f"{field_name}__icontains": search_term})
if field_name == "source_record_payload_text":
escaped_search_term = cls._json_escaped_search_term(search_term)
if escaped_search_term != search_term:
query |= Q(
**{f"{field_name}__icontains": escaped_search_term},
)
}
return query | cls._registry_search_query(search_term)
@staticmethod
def _json_escaped_search_term(search_term: str) -> str:
return json.dumps(search_term, ensure_ascii=True)[1:-1]
@staticmethod
def _registry_search_query(search_term: str) -> Q:
registry_membership = (
RegistryMembershipPeriod.objects.filter(
ended_at__isnull=True,
)
if included:
return {
to_api_data_source(to_internal_data_source(source))
for source in included - excluded
}
if excluded:
return API_DATA_SOURCE_KEY_SET - excluded
return default
def _query_param_values(request, *names: str) -> set[str]:
values: set[str] = set()
for name in names:
for raw_value in request.query_params.getlist(name):
values.update(
value.strip() for value in raw_value.split(",") if value.strip()
.order_by()
.annotate(
registry_inn_text=Cast(
"organization__mn_inn",
output_field=CharField(),
),
registry_kpp_text=Cast(
"organization__in_kpp",
output_field=CharField(),
),
registry_ogrn_text=Cast(
"organization__mn_ogrn",
output_field=CharField(),
),
)
return values
.filter(
Q(organization__pn_name__icontains=search_term)
| Q(registry_inn_text__icontains=search_term)
| Q(registry_kpp_text__icontains=search_term)
| Q(registry_ogrn_text__icontains=search_term),
)
)
inn_values = registry_membership.values_list("registry_inn_text", flat=True)
ogrn_values = registry_membership.values_list("registry_ogrn_text", flat=True)
return (
Q(extension__organization__inn__in=inn_values)
| Q(extension__organization__ogrn__in=ogrn_values)
| Q(extension__organization__ogrip__in=ogrn_values)
)
@swagger_auto_schema(
tags=[ORGANIZATIONS_TAG],
operation_id="v2_organization_source_records_list",
operation_summary="Записи источников организаций",
operation_description=(
"Возвращает плоский пагинированный список записей источников с "
"данными организации и финансовыми строками при наличии."
),
manual_parameters=SOURCE_RECORD_LIST_PARAMS,
responses={200: "Пагинированный список записей источников"},
)
def list(self, request, *args: Any, **kwargs: Any) -> Response:
return super().list(request, *args, **kwargs)