feat: migrate parser data to source records
This commit is contained in:
@@ -42,7 +42,9 @@ class OrganizationFilter(filters.FilterSet):
|
||||
kpp = filters.CharFilter(field_name="kpp", lookup_expr="exact")
|
||||
ogrn = filters.CharFilter(field_name="ogrn", lookup_expr="exact")
|
||||
ogrip = filters.CharFilter(field_name="ogrip", lookup_expr="exact")
|
||||
identity_status = filters.CharFilter(field_name="identity_status", lookup_expr="exact")
|
||||
identity_status = filters.CharFilter(
|
||||
field_name="identity_status", lookup_expr="exact"
|
||||
)
|
||||
registry = filters.UUIDFilter(method="filter_registry")
|
||||
registry_name = filters.CharFilter(method="filter_registry_name")
|
||||
has_registry = filters.BooleanFilter(method="filter_has_registry")
|
||||
|
||||
@@ -14,14 +14,23 @@ from organizations.name_normalization import normalize_organization_name
|
||||
class SourceGroup(models.TextChoices):
|
||||
"""Product-level organization source groups."""
|
||||
|
||||
FINANCIAL_INDICATORS = "financial_indicators", _("Финансово-экономические показатели")
|
||||
FINANCIAL_INDICATORS = (
|
||||
"financial_indicators",
|
||||
_("Финансово-экономические показатели"),
|
||||
)
|
||||
GOVERNMENT_PROCUREMENTS = "government_procurements", _("Государственные закупки")
|
||||
INDUSTRIAL_PRODUCTION = "industrial_production", _("Производители и продукция России")
|
||||
INDUSTRIAL_PRODUCTION = (
|
||||
"industrial_production",
|
||||
_("Производители и продукция России"),
|
||||
)
|
||||
PLANNED_INSPECTIONS = "planned_inspections", _("Плановые проверки")
|
||||
BANKRUPTCY = "bankruptcy", _("Сведения о процедурах банкротства")
|
||||
DEFENSE_SUPPLIERS = "defense_suppliers", _("Недобросовестные поставщики ГОЗ")
|
||||
ARBITRATION = "arbitration", _("Арбитражные дела")
|
||||
SECURITY_REGISTRIES = "security_registries", _("Реестры по информационной безопасности")
|
||||
SECURITY_REGISTRIES = (
|
||||
"security_registries",
|
||||
_("Реестры по информационной безопасности"),
|
||||
)
|
||||
VACANCIES = "vacancies", _("Вакансии")
|
||||
|
||||
|
||||
@@ -472,6 +481,100 @@ class OrganizationSourceRecord(models.Model):
|
||||
def __str__(self) -> str:
|
||||
return self.title or self.external_id or str(self.uid)
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
"""Compatibility alias for legacy parser services that exposed integer id."""
|
||||
return self.pk
|
||||
|
||||
@property
|
||||
def lines(self):
|
||||
"""Compatibility alias for financial reports stored as source records."""
|
||||
return self.financial_lines
|
||||
|
||||
@property
|
||||
def inn(self) -> str:
|
||||
"""Return the canonical organization INN or source payload INN."""
|
||||
organization = self.extension.organization
|
||||
return organization.inn or str((self.payload or {}).get("inn") or "")
|
||||
|
||||
@property
|
||||
def kpp(self) -> str:
|
||||
"""Return the canonical organization KPP or source payload KPP."""
|
||||
organization = self.extension.organization
|
||||
return organization.kpp or str((self.payload or {}).get("kpp") or "")
|
||||
|
||||
@property
|
||||
def ogrn(self) -> str:
|
||||
"""Return the canonical organization OGRN or source payload OGRN."""
|
||||
organization = self.extension.organization
|
||||
return organization.ogrn or str((self.payload or {}).get("ogrn") or "")
|
||||
|
||||
@property
|
||||
def ogrip(self) -> str:
|
||||
"""Return the canonical organization OGRIP or source payload OGRIP."""
|
||||
organization = self.extension.organization
|
||||
return organization.ogrip or str((self.payload or {}).get("ogrip") or "")
|
||||
|
||||
@property
|
||||
def registry_organization(self):
|
||||
"""Best-effort active registry organization matched by canonical identity."""
|
||||
from django.db.models import CharField
|
||||
from django.db.models.functions import Cast
|
||||
from registers.models import Organization as RegistryOrganization
|
||||
from registers.models import RegistryMembershipPeriod
|
||||
|
||||
def _registry_numeric_values(value: str) -> list[str]:
|
||||
stripped = str(value or "").lstrip("0")
|
||||
return [value, stripped] if stripped and stripped != value else [value]
|
||||
|
||||
identity_filter = Q()
|
||||
inn = self.inn
|
||||
ogrn = self.ogrn
|
||||
ogrip = self.ogrip
|
||||
if inn:
|
||||
identity_filter |= Q(registry_inn_text__in=_registry_numeric_values(inn))
|
||||
if ogrn:
|
||||
identity_filter |= Q(registry_ogrn_text__in=_registry_numeric_values(ogrn))
|
||||
if ogrip:
|
||||
identity_filter |= Q(registry_ogrn_text__in=_registry_numeric_values(ogrip))
|
||||
if not identity_filter:
|
||||
return None
|
||||
|
||||
membership = (
|
||||
RegistryMembershipPeriod.objects.filter(ended_at__isnull=True)
|
||||
.select_related("organization")
|
||||
.annotate(
|
||||
registry_inn_text=Cast(
|
||||
"organization__mn_inn",
|
||||
output_field=CharField(),
|
||||
),
|
||||
registry_ogrn_text=Cast(
|
||||
"organization__mn_ogrn",
|
||||
output_field=CharField(),
|
||||
),
|
||||
)
|
||||
.filter(identity_filter)
|
||||
.order_by("organization__pn_name", "organization_id")
|
||||
.first()
|
||||
)
|
||||
if membership is not None:
|
||||
return membership.organization
|
||||
|
||||
return (
|
||||
RegistryOrganization.objects.annotate(
|
||||
registry_inn_text=Cast("mn_inn", output_field=CharField()),
|
||||
registry_ogrn_text=Cast("mn_ogrn", output_field=CharField()),
|
||||
)
|
||||
.filter(identity_filter)
|
||||
.order_by("pn_name", "id")
|
||||
.first()
|
||||
)
|
||||
|
||||
@property
|
||||
def registry_organization_id(self):
|
||||
registry_organization = self.registry_organization
|
||||
return registry_organization.id if registry_organization is not None else None
|
||||
|
||||
|
||||
class OrganizationSourceFinancialLine(models.Model):
|
||||
"""Structured financial report line under a source record."""
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from django.db.models import CharField, Q
|
||||
from django.db.models.functions import Cast
|
||||
from drf_yasg.utils import swagger_serializer_method
|
||||
from registers.models import RegistryMembershipPeriod
|
||||
from rest_framework import serializers
|
||||
|
||||
@@ -32,13 +33,28 @@ class OrganizationSourceFinancialLineSerializer(serializers.ModelSerializer):
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class OrganizationSourceRecordOrganizationSerializer(serializers.Serializer):
|
||||
"""Organization summary embedded into one source record."""
|
||||
|
||||
uid = serializers.UUIDField(read_only=True)
|
||||
name = serializers.CharField(read_only=True, allow_blank=True)
|
||||
inn = serializers.CharField(read_only=True, allow_blank=True)
|
||||
kpp = serializers.CharField(read_only=True, allow_blank=True)
|
||||
ogrn = serializers.CharField(read_only=True, allow_blank=True)
|
||||
ogrip = serializers.CharField(read_only=True, allow_blank=True)
|
||||
|
||||
|
||||
class OrganizationSourceRecordSerializer(serializers.ModelSerializer):
|
||||
"""Source record stored under one source extension."""
|
||||
|
||||
extension_uid = serializers.UUIDField(source="extension.uid", read_only=True)
|
||||
financial_lines = OrganizationSourceFinancialLineSerializer(many=True, read_only=True)
|
||||
financial_lines = OrganizationSourceFinancialLineSerializer(
|
||||
many=True, read_only=True
|
||||
)
|
||||
organization = serializers.SerializerMethodField()
|
||||
source_group = serializers.CharField(source="extension.source_group", read_only=True)
|
||||
source_group = serializers.CharField(
|
||||
source="extension.source_group", read_only=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = OrganizationSourceRecord
|
||||
@@ -65,6 +81,9 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer):
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@swagger_serializer_method(
|
||||
serializer_or_field=OrganizationSourceRecordOrganizationSerializer,
|
||||
)
|
||||
def get_organization(self, obj) -> dict[str, str]:
|
||||
organization = obj.extension.organization
|
||||
registry_organization = self._get_registry_organization(organization)
|
||||
@@ -75,9 +94,9 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer):
|
||||
ogrn = organization.ogrn
|
||||
if registry_organization is not None:
|
||||
name = registry_organization.pn_name or name
|
||||
inn = self._value_to_string(registry_organization.mn_inn) or inn
|
||||
kpp = self._value_to_string(registry_organization.in_kpp) or kpp
|
||||
ogrn = self._value_to_string(registry_organization.mn_ogrn) or ogrn
|
||||
inn = inn or self._value_to_inn_string(registry_organization.mn_inn)
|
||||
kpp = kpp or self._value_to_string(registry_organization.in_kpp)
|
||||
ogrn = ogrn or self._value_to_string(registry_organization.mn_ogrn)
|
||||
|
||||
return {
|
||||
"uid": str(organization.uid),
|
||||
@@ -185,6 +204,22 @@ class OrganizationSourceRecordSerializer(serializers.ModelSerializer):
|
||||
return ""
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def _value_to_inn_string(cls, value) -> str:
|
||||
inn = cls._value_to_string(value)
|
||||
if len(inn) in {9, 11} and inn.isdigit():
|
||||
return inn.zfill(len(inn) + 1)
|
||||
return inn
|
||||
|
||||
|
||||
class OrganizationSourceRecordListResponseSerializer(serializers.Serializer):
|
||||
"""Paginated source-record list response in unified API format."""
|
||||
|
||||
success = serializers.BooleanField(read_only=True)
|
||||
data = OrganizationSourceRecordSerializer(many=True, read_only=True)
|
||||
errors = serializers.JSONField(read_only=True, allow_null=True)
|
||||
meta = serializers.JSONField(read_only=True, allow_null=True)
|
||||
|
||||
|
||||
class OrganizationSourceExtensionSerializer(serializers.ModelSerializer):
|
||||
"""Compact source extension representation."""
|
||||
@@ -249,5 +284,7 @@ class OrganizationSerializer(serializers.ModelSerializer):
|
||||
"id": str(membership.registry_id),
|
||||
"name": membership.registry.name,
|
||||
}
|
||||
for membership in query.select_related("registry").order_by("registry__name")
|
||||
for membership in query.select_related("registry").order_by(
|
||||
"registry__name"
|
||||
)
|
||||
]
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from apps.parsers.models import ParserLoadLog
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.db.models.signals import post_delete, post_save
|
||||
from django.dispatch import receiver
|
||||
@@ -18,6 +21,8 @@ from registers.models import (
|
||||
from organizations.cache import invalidate_organization_api_cache
|
||||
from organizations.models import OrganizationDataSnapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SOURCE_UPDATE_STATUSES = {
|
||||
ParserLoadLog.Status.SUCCESS,
|
||||
ParserLoadLog.Status.SKIPPED,
|
||||
@@ -28,6 +33,21 @@ def _invalidate_on_commit() -> None:
|
||||
transaction.on_commit(invalidate_organization_api_cache)
|
||||
|
||||
|
||||
def _warm_main_dashboard_cache_on_commit() -> None:
|
||||
def enqueue_warm_task() -> None:
|
||||
try:
|
||||
from registers.tasks import warm_main_dashboard_cache_task
|
||||
|
||||
if getattr(settings, "CELERY_TASK_ALWAYS_EAGER", False):
|
||||
warm_main_dashboard_cache_task.apply()
|
||||
else:
|
||||
warm_main_dashboard_cache_task.delay()
|
||||
except Exception:
|
||||
logger.exception("Failed to enqueue main dashboard cache warm task")
|
||||
|
||||
transaction.on_commit(enqueue_warm_task)
|
||||
|
||||
|
||||
@receiver(
|
||||
post_save, sender=ParserLoadLog, dispatch_uid="organizations_parser_load_save"
|
||||
)
|
||||
@@ -35,6 +55,7 @@ def invalidate_for_parser_load(sender, instance: ParserLoadLog, **kwargs) -> Non
|
||||
"""Invalidate when a parser source reaches a visible terminal state."""
|
||||
if instance.status in SOURCE_UPDATE_STATUSES:
|
||||
_invalidate_on_commit()
|
||||
_warm_main_dashboard_cache_on_commit()
|
||||
|
||||
|
||||
@receiver(post_save, sender=Register, dispatch_uid="organizations_register_save")
|
||||
|
||||
@@ -52,16 +52,21 @@ class OrganizationSourceBackfillResult:
|
||||
updated_financial_lines: int = 0
|
||||
unresolved: int = 0
|
||||
|
||||
def plus(self, other: OrganizationSourceBackfillResult) -> OrganizationSourceBackfillResult:
|
||||
def plus(
|
||||
self, other: OrganizationSourceBackfillResult
|
||||
) -> OrganizationSourceBackfillResult:
|
||||
return OrganizationSourceBackfillResult(
|
||||
scanned=self.scanned + other.scanned,
|
||||
created_organizations=self.created_organizations + other.created_organizations,
|
||||
created_organizations=self.created_organizations
|
||||
+ other.created_organizations,
|
||||
created_extensions=self.created_extensions + other.created_extensions,
|
||||
updated_extensions=self.updated_extensions + other.updated_extensions,
|
||||
created_records=self.created_records + other.created_records,
|
||||
updated_records=self.updated_records + other.updated_records,
|
||||
created_financial_lines=self.created_financial_lines + other.created_financial_lines,
|
||||
updated_financial_lines=self.updated_financial_lines + other.updated_financial_lines,
|
||||
created_financial_lines=self.created_financial_lines
|
||||
+ other.created_financial_lines,
|
||||
updated_financial_lines=self.updated_financial_lines
|
||||
+ other.updated_financial_lines,
|
||||
unresolved=self.unresolved + other.unresolved,
|
||||
)
|
||||
|
||||
@@ -131,14 +136,20 @@ class OrganizationSourceBackfillService:
|
||||
with transaction.atomic():
|
||||
for adapter in adapters:
|
||||
scanned += 1
|
||||
organization, organization_created = cls._resolve_or_create_organization(adapter)
|
||||
(
|
||||
organization,
|
||||
organization_created,
|
||||
) = cls._resolve_or_create_organization(adapter)
|
||||
if organization is None:
|
||||
unresolved += 1
|
||||
continue
|
||||
if organization_created:
|
||||
created_organizations += 1
|
||||
|
||||
extension, extension_created = descriptor.extension_model.objects.get_or_create(
|
||||
(
|
||||
extension,
|
||||
extension_created,
|
||||
) = descriptor.extension_model.objects.get_or_create(
|
||||
organization=organization,
|
||||
defaults={
|
||||
"source_group": descriptor.source_group,
|
||||
@@ -149,9 +160,14 @@ class OrganizationSourceBackfillService:
|
||||
if extension_created:
|
||||
created_extensions += 1
|
||||
else:
|
||||
updated_extensions += cls._update_extension(extension, descriptor, adapter)
|
||||
updated_extensions += cls._update_extension(
|
||||
extension, descriptor, adapter
|
||||
)
|
||||
|
||||
source_record, record_created = OrganizationSourceRecord.objects.update_or_create(
|
||||
(
|
||||
source_record,
|
||||
record_created,
|
||||
) = OrganizationSourceRecord.objects.update_or_create(
|
||||
legacy_model=adapter.legacy_model,
|
||||
legacy_pk=adapter.legacy_pk,
|
||||
defaults={
|
||||
@@ -174,7 +190,9 @@ class OrganizationSourceBackfillService:
|
||||
updated_records += 1
|
||||
|
||||
if adapter.source == ParserLoadLog.Source.FNS_REPORTS:
|
||||
line_result = cls._backfill_financial_lines(source_record, adapter.legacy_pk)
|
||||
line_result = cls._backfill_financial_lines(
|
||||
source_record, adapter.legacy_pk
|
||||
)
|
||||
created_financial_lines += line_result[0]
|
||||
updated_financial_lines += line_result[1]
|
||||
|
||||
@@ -204,7 +222,10 @@ class OrganizationSourceBackfillService:
|
||||
if extension.title != descriptor.title:
|
||||
extension.title = descriptor.title
|
||||
changed = True
|
||||
if adapter.load_batch is not None and extension.last_load_batch != adapter.load_batch:
|
||||
if (
|
||||
adapter.load_batch is not None
|
||||
and extension.last_load_batch != adapter.load_batch
|
||||
):
|
||||
extension.last_load_batch = adapter.load_batch
|
||||
changed = True
|
||||
if changed:
|
||||
@@ -371,7 +392,9 @@ class OrganizationSourceBackfillService:
|
||||
|
||||
@staticmethod
|
||||
def _refresh_extension_counters(extension_ids: set[str]) -> None:
|
||||
for extension in OrganizationSourceExtension.objects.filter(uid__in=extension_ids):
|
||||
for extension in OrganizationSourceExtension.objects.filter(
|
||||
uid__in=extension_ids
|
||||
):
|
||||
aggregate = extension.records.aggregate(
|
||||
records_count=Count("uid"),
|
||||
first_seen_at=Min("created_at"),
|
||||
@@ -540,13 +563,16 @@ class OrganizationSourceBackfillService:
|
||||
url: str = "",
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> LegacyRecordAdapter:
|
||||
normalized_inn, normalized_kpp, normalized_ogrn, normalized_ogrip = (
|
||||
normalize_identity_fields(
|
||||
inn=inn,
|
||||
kpp=kpp,
|
||||
ogrn=ogrn,
|
||||
ogrip=ogrip,
|
||||
)
|
||||
(
|
||||
normalized_inn,
|
||||
normalized_kpp,
|
||||
normalized_ogrn,
|
||||
normalized_ogrip,
|
||||
) = normalize_identity_fields(
|
||||
inn=inn,
|
||||
kpp=kpp,
|
||||
ogrn=ogrn,
|
||||
ogrip=ogrip,
|
||||
)
|
||||
return LegacyRecordAdapter(
|
||||
source=str(descriptor.source),
|
||||
|
||||
24
src/organizations/source_cache.py
Normal file
24
src/organizations/source_cache.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Cache versioning for organization source-derived aggregates."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from django.core.cache import cache
|
||||
|
||||
SOURCE_DATA_CACHE_VERSION_KEY = "organizations:source_data:version"
|
||||
|
||||
|
||||
def get_source_data_cache_version() -> str:
|
||||
"""Return current cache version for aggregates derived from source records."""
|
||||
return str(cache.get(SOURCE_DATA_CACHE_VERSION_KEY) or "initial")
|
||||
|
||||
|
||||
def invalidate_source_data_cache() -> None:
|
||||
"""Invalidate aggregates derived from organization source records."""
|
||||
version = uuid4()
|
||||
cache.set(
|
||||
SOURCE_DATA_CACHE_VERSION_KEY,
|
||||
version.hex if hasattr(version, "hex") else str(version),
|
||||
timeout=None,
|
||||
)
|
||||
@@ -20,6 +20,7 @@ from organizations.models import (
|
||||
OrganizationSourceRecord,
|
||||
)
|
||||
from organizations.name_normalization import normalize_organization_name
|
||||
from organizations.source_cache import invalidate_source_data_cache
|
||||
from organizations.source_groups import (
|
||||
SourceGroupDescriptor,
|
||||
get_source_group_descriptor,
|
||||
@@ -128,18 +129,21 @@ class OrganizationSourceIngestionService:
|
||||
|
||||
with transaction.atomic():
|
||||
normalized_records = cls._normalize_records(records)
|
||||
organizations_by_index, created_organizations = (
|
||||
cls._resolve_or_create_organizations(normalized_records)
|
||||
)
|
||||
(
|
||||
organizations_by_index,
|
||||
created_organizations,
|
||||
) = cls._resolve_or_create_organizations(normalized_records)
|
||||
del normalized_records
|
||||
unresolved = scanned - len(organizations_by_index)
|
||||
|
||||
extensions_by_organization_id, created_extensions, updated_extensions = (
|
||||
cls._resolve_or_create_extensions(
|
||||
descriptor=descriptor,
|
||||
load_batch=load_batch,
|
||||
organizations=organizations_by_index.values(),
|
||||
)
|
||||
(
|
||||
extensions_by_organization_id,
|
||||
created_extensions,
|
||||
updated_extensions,
|
||||
) = cls._resolve_or_create_extensions(
|
||||
descriptor=descriptor,
|
||||
load_batch=load_batch,
|
||||
organizations=organizations_by_index.values(),
|
||||
)
|
||||
|
||||
touched_extension_ids: set[str] = set()
|
||||
@@ -166,6 +170,8 @@ class OrganizationSourceIngestionService:
|
||||
|
||||
cls._refresh_extension_counters(touched_extension_ids)
|
||||
|
||||
invalidate_source_data_cache()
|
||||
|
||||
return OrganizationSourceIngestionResult(
|
||||
scanned=scanned,
|
||||
created_organizations=created_organizations,
|
||||
@@ -256,10 +262,18 @@ class OrganizationSourceIngestionService:
|
||||
organizations_by_index,
|
||||
)
|
||||
|
||||
return cls._create_missing_organizations(
|
||||
(
|
||||
organizations_by_index,
|
||||
created_organizations,
|
||||
) = cls._create_missing_organizations(
|
||||
normalized_records,
|
||||
organizations_by_index,
|
||||
)
|
||||
cls._update_resolved_organization_identities(
|
||||
normalized_records,
|
||||
organizations_by_index,
|
||||
)
|
||||
return organizations_by_index, created_organizations
|
||||
|
||||
@classmethod
|
||||
def _resolve_organizations_by_inn_kpp(
|
||||
@@ -285,7 +299,9 @@ class OrganizationSourceIngestionService:
|
||||
for inn, kpp in chunk:
|
||||
query |= Q(inn=inn, kpp=kpp)
|
||||
for organization in Organization.objects.filter(query):
|
||||
organizations_by_key[(organization.inn, organization.kpp)] = organization
|
||||
organizations_by_key[
|
||||
(organization.inn, organization.kpp)
|
||||
] = organization
|
||||
|
||||
for record in normalized_records:
|
||||
if record.index in organizations_by_index:
|
||||
@@ -384,6 +400,7 @@ class OrganizationSourceIngestionService:
|
||||
record.organization_name.strip()
|
||||
for record in normalized_records
|
||||
if record.index not in organizations_by_index
|
||||
and not cls._record_has_identity(record)
|
||||
and normalize_organization_name(record.organization_name)
|
||||
}
|
||||
)
|
||||
@@ -406,10 +423,136 @@ class OrganizationSourceIngestionService:
|
||||
for record in normalized_records:
|
||||
if record.index in organizations_by_index:
|
||||
continue
|
||||
if cls._record_has_identity(record):
|
||||
continue
|
||||
organization = unique_by_name.get(record.organization_name.strip().lower())
|
||||
if organization is not None:
|
||||
organizations_by_index[record.index] = organization
|
||||
|
||||
@staticmethod
|
||||
def _record_has_identity(record: _NormalizedRecordInput) -> bool:
|
||||
return bool(record.inn or record.ogrn or record.ogrip)
|
||||
|
||||
@classmethod
|
||||
def _update_resolved_organization_identities(
|
||||
cls,
|
||||
normalized_records: list[_NormalizedRecordInput],
|
||||
organizations_by_index: dict[int, Organization],
|
||||
) -> None:
|
||||
safe_inn_by_organization_id = cls._safe_missing_inn_updates(
|
||||
normalized_records,
|
||||
organizations_by_index,
|
||||
)
|
||||
changed_by_uid: dict[str, Organization] = {}
|
||||
for record in normalized_records:
|
||||
organization = organizations_by_index.get(record.index)
|
||||
if organization is None:
|
||||
continue
|
||||
if cls._apply_missing_identity_fields(
|
||||
organization,
|
||||
record,
|
||||
safe_inn_by_organization_id=safe_inn_by_organization_id,
|
||||
):
|
||||
changed_by_uid[str(organization.uid)] = organization
|
||||
|
||||
if changed_by_uid:
|
||||
Organization.objects.bulk_update(
|
||||
list(changed_by_uid.values()),
|
||||
fields=[
|
||||
"name",
|
||||
"inn",
|
||||
"kpp",
|
||||
"ogrn",
|
||||
"ogrip",
|
||||
"identity_status",
|
||||
"primary_identity",
|
||||
],
|
||||
batch_size=cls.chunk_size,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _safe_missing_inn_updates(
|
||||
cls,
|
||||
normalized_records: list[_NormalizedRecordInput],
|
||||
organizations_by_index: dict[int, Organization],
|
||||
) -> dict[str, str]:
|
||||
desired_inn_by_organization_id: dict[str, str] = {}
|
||||
desired_organization_ids_by_inn: dict[str, set[str]] = defaultdict(set)
|
||||
for record in normalized_records:
|
||||
organization = organizations_by_index.get(record.index)
|
||||
if organization is None or organization.inn or not record.inn:
|
||||
continue
|
||||
organization_id = str(organization.uid)
|
||||
desired_inn_by_organization_id.setdefault(organization_id, record.inn)
|
||||
desired_organization_ids_by_inn[record.inn].add(organization_id)
|
||||
|
||||
conflicting_inns = set()
|
||||
if desired_inn_by_organization_id:
|
||||
conflicting_inns = set(
|
||||
Organization.objects.filter(
|
||||
inn__in=set(desired_inn_by_organization_id.values())
|
||||
)
|
||||
.exclude(uid__in=list(desired_inn_by_organization_id))
|
||||
.values_list("inn", flat=True)
|
||||
)
|
||||
|
||||
safe_updates = {}
|
||||
for organization_id, inn in desired_inn_by_organization_id.items():
|
||||
if (
|
||||
len(desired_organization_ids_by_inn[inn]) == 1
|
||||
and inn not in conflicting_inns
|
||||
):
|
||||
safe_updates[organization_id] = inn
|
||||
return safe_updates
|
||||
|
||||
@classmethod
|
||||
def _apply_missing_identity_fields(
|
||||
cls,
|
||||
organization: Organization,
|
||||
record: _NormalizedRecordInput,
|
||||
*,
|
||||
safe_inn_by_organization_id: dict[str, str],
|
||||
) -> bool:
|
||||
changed = False
|
||||
organization_id = str(organization.uid)
|
||||
safe_inn = safe_inn_by_organization_id.get(organization_id)
|
||||
if not organization.inn and safe_inn == record.inn:
|
||||
organization.inn = record.inn
|
||||
changed = True
|
||||
if not organization.kpp and record.kpp:
|
||||
organization.kpp = record.kpp
|
||||
changed = True
|
||||
if not organization.ogrn and record.ogrn:
|
||||
organization.ogrn = record.ogrn
|
||||
changed = True
|
||||
if not organization.ogrip and record.ogrip:
|
||||
organization.ogrip = record.ogrip
|
||||
changed = True
|
||||
if cls._should_replace_placeholder_name(organization, record.organization_name):
|
||||
organization.name = record.organization_name.strip()
|
||||
changed = True
|
||||
if changed:
|
||||
organization.identity_status = organization._resolve_identity_status()
|
||||
organization.primary_identity = organization._resolve_primary_identity()
|
||||
return changed
|
||||
|
||||
@staticmethod
|
||||
def _should_replace_placeholder_name(
|
||||
organization: Organization,
|
||||
candidate_name: str,
|
||||
) -> bool:
|
||||
normalized_candidate = normalize_organization_name(candidate_name)
|
||||
if not normalized_candidate:
|
||||
return False
|
||||
current_name = organization.name.strip()
|
||||
if not normalize_organization_name(current_name):
|
||||
return True
|
||||
return current_name in {
|
||||
organization.inn,
|
||||
organization.ogrn,
|
||||
organization.ogrip,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _create_missing_organizations(
|
||||
cls,
|
||||
@@ -510,7 +653,9 @@ class OrganizationSourceIngestionService:
|
||||
load_batch: int | None,
|
||||
organizations: Iterable[Organization],
|
||||
) -> tuple[dict[Any, OrganizationSourceExtension], int, int]:
|
||||
unique_organizations = {organization.uid: organization for organization in organizations}
|
||||
unique_organizations = {
|
||||
organization.uid: organization for organization in organizations
|
||||
}
|
||||
if not unique_organizations:
|
||||
return {}, 0, 0
|
||||
|
||||
@@ -526,13 +671,13 @@ class OrganizationSourceIngestionService:
|
||||
for organization_id, organization in unique_organizations.items():
|
||||
if organization_id in extensions_by_organization_id:
|
||||
continue
|
||||
extensions_by_organization_id[organization_id] = (
|
||||
descriptor.extension_model.objects.create(
|
||||
organization=organization,
|
||||
source_group=descriptor.source_group,
|
||||
title=descriptor.title,
|
||||
last_load_batch=load_batch,
|
||||
)
|
||||
extensions_by_organization_id[
|
||||
organization_id
|
||||
] = descriptor.extension_model.objects.create(
|
||||
organization=organization,
|
||||
source_group=descriptor.source_group,
|
||||
title=descriptor.title,
|
||||
last_load_batch=load_batch,
|
||||
)
|
||||
created_extensions += 1
|
||||
|
||||
@@ -579,12 +724,14 @@ class OrganizationSourceIngestionService:
|
||||
updated_financial_lines = 0
|
||||
|
||||
for chunk in cls._iter_chunks(record_inputs_with_extensions, cls.chunk_size):
|
||||
source_records_by_index, chunk_created, chunk_updated = (
|
||||
cls._bulk_upsert_source_records_chunk(
|
||||
descriptor=descriptor,
|
||||
load_batch=load_batch,
|
||||
record_inputs_with_extensions=chunk,
|
||||
)
|
||||
(
|
||||
source_records_by_index,
|
||||
chunk_created,
|
||||
chunk_updated,
|
||||
) = cls._bulk_upsert_source_records_chunk(
|
||||
descriptor=descriptor,
|
||||
load_batch=load_batch,
|
||||
record_inputs_with_extensions=chunk,
|
||||
)
|
||||
created_records += chunk_created
|
||||
updated_records += chunk_updated
|
||||
|
||||
@@ -36,6 +36,7 @@ from organizations.models import (
|
||||
from organizations.serializers import (
|
||||
OrganizationSerializer,
|
||||
OrganizationSourceExtensionSerializer,
|
||||
OrganizationSourceRecordListResponseSerializer,
|
||||
OrganizationSourceRecordSerializer,
|
||||
)
|
||||
|
||||
@@ -181,19 +182,15 @@ SOURCE_RECORD_LIST_PARAMS = [
|
||||
"статусу, датам, URL и исходным данным записи."
|
||||
),
|
||||
),
|
||||
_query_parameter("page", description="Номер страницы.", param_type=openapi.TYPE_INTEGER),
|
||||
_query_parameter(
|
||||
"page", description="Номер страницы.", param_type=openapi.TYPE_INTEGER
|
||||
),
|
||||
_query_parameter(
|
||||
"page_size",
|
||||
description="Размер страницы. Максимум 100.",
|
||||
param_type=openapi.TYPE_INTEGER,
|
||||
),
|
||||
]
|
||||
ORGANIZATION_LIST_RESPONSE = openapi.Response(
|
||||
description="Пагинированный список организаций v2 с компактными источниками.",
|
||||
)
|
||||
ORGANIZATION_DETAIL_RESPONSE = openapi.Response(
|
||||
description="Карточка организации v2.",
|
||||
)
|
||||
|
||||
|
||||
class CachedReadOnlyMixin:
|
||||
@@ -300,7 +297,12 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
|
||||
"записи доступны через endpoints расширений источников."
|
||||
),
|
||||
manual_parameters=ORGANIZATION_LIST_PARAMS,
|
||||
responses={200: ORGANIZATION_LIST_RESPONSE},
|
||||
responses={
|
||||
200: openapi.Response(
|
||||
"Пагинированный список организаций.",
|
||||
OrganizationSerializer(many=True),
|
||||
)
|
||||
},
|
||||
)
|
||||
def list(self, request, *args: Any, **kwargs: Any) -> Response:
|
||||
return self._cached_response(
|
||||
@@ -317,7 +319,13 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
|
||||
"группами источников."
|
||||
),
|
||||
manual_parameters=ORGANIZATION_DETAIL_PARAMS,
|
||||
responses={200: ORGANIZATION_DETAIL_RESPONSE, 404: "Организация не найдена"},
|
||||
responses={
|
||||
200: openapi.Response(
|
||||
"Карточка организации.",
|
||||
OrganizationSerializer,
|
||||
),
|
||||
404: "Организация не найдена",
|
||||
},
|
||||
)
|
||||
def retrieve(self, request, *args: Any, **kwargs: Any) -> Response:
|
||||
return self._cached_response(
|
||||
@@ -330,7 +338,10 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
|
||||
operation_id="v2_organizations_sources",
|
||||
operation_summary="Источники организации",
|
||||
operation_description="Возвращает source extensions одной организации.",
|
||||
responses={200: "Список source extensions", 404: "Организация не найдена"},
|
||||
responses={
|
||||
200: OrganizationSourceExtensionSerializer(many=True),
|
||||
404: "Организация не найдена",
|
||||
},
|
||||
)
|
||||
@action(detail=True, methods=["get"])
|
||||
def sources(self, request, *args: Any, **kwargs: Any) -> Response:
|
||||
@@ -345,7 +356,9 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
|
||||
class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet):
|
||||
"""Read-only API for source extensions and their records."""
|
||||
|
||||
queryset = OrganizationSourceExtension.objects.select_related("organization").order_by(
|
||||
queryset = OrganizationSourceExtension.objects.select_related(
|
||||
"organization"
|
||||
).order_by(
|
||||
"organization__name",
|
||||
"source_group",
|
||||
)
|
||||
@@ -367,7 +380,10 @@ class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet):
|
||||
operation_summary="Записи источника организации",
|
||||
operation_description="Возвращает записи под конкретным source extension.",
|
||||
manual_parameters=SOURCE_EXTENSION_PATH_PARAMS,
|
||||
responses={200: "Пагинированный список записей источника", 404: "Источник не найден"},
|
||||
responses={
|
||||
200: OrganizationSourceRecordListResponseSerializer,
|
||||
404: "Источник не найден",
|
||||
},
|
||||
)
|
||||
@action(detail=True, methods=["get"])
|
||||
def records(self, request, *args: Any, **kwargs: Any) -> Response:
|
||||
@@ -388,10 +404,14 @@ class OrganizationSourceExtensionViewSet(ReadOnlyModelViewSet):
|
||||
class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet):
|
||||
"""Read-only flat API for source records across source extensions."""
|
||||
|
||||
queryset = OrganizationSourceRecord.objects.select_related(
|
||||
"extension",
|
||||
"extension__organization",
|
||||
).prefetch_related("financial_lines").order_by("-created_at", "-uid")
|
||||
queryset = (
|
||||
OrganizationSourceRecord.objects.select_related(
|
||||
"extension",
|
||||
"extension__organization",
|
||||
)
|
||||
.prefetch_related("financial_lines")
|
||||
.order_by("-created_at", "-uid")
|
||||
)
|
||||
serializer_class = OrganizationSourceRecordSerializer
|
||||
permission_classes = [IsAuthenticated]
|
||||
lookup_field = "uid"
|
||||
@@ -465,7 +485,10 @@ class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet):
|
||||
|
||||
@staticmethod
|
||||
def _registry_membership_query():
|
||||
inn_values, ogrn_values = OrganizationFilter._registry_identity_value_querysets()
|
||||
(
|
||||
inn_values,
|
||||
ogrn_values,
|
||||
) = OrganizationFilter._registry_identity_value_querysets()
|
||||
|
||||
return (
|
||||
Q(extension__organization__inn__in=inn_values)
|
||||
@@ -552,7 +575,7 @@ class OrganizationSourceRecordViewSet(ReadOnlyModelViewSet):
|
||||
"данными организации и финансовыми строками при наличии."
|
||||
),
|
||||
manual_parameters=SOURCE_RECORD_LIST_PARAMS,
|
||||
responses={200: "Пагинированный список записей источников"},
|
||||
responses={200: OrganizationSourceRecordListResponseSerializer},
|
||||
)
|
||||
def list(self, request, *args: Any, **kwargs: Any) -> Response:
|
||||
return super().list(request, *args, **kwargs)
|
||||
|
||||
Reference in New Issue
Block a user