Add organizations v2 API and registry enrichment
This commit is contained in:
703
src/organizations/services.py
Normal file
703
src/organizations/services.py
Normal file
@@ -0,0 +1,703 @@
|
||||
"""Services for building the canonical organizations directory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
|
||||
from apps.parsers.models import (
|
||||
FinancialReport,
|
||||
GenericParserRecord,
|
||||
IndustrialCertificateRecord,
|
||||
IndustrialProductRecord,
|
||||
InspectionRecord,
|
||||
ManufacturerRecord,
|
||||
ParserLoadLog,
|
||||
ProcurementRecord,
|
||||
)
|
||||
from django.db import transaction
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from registers.models import Organization as RegisterOrganization
|
||||
|
||||
from organizations.api_enrichment import OrganizationApiEnrichmentService
|
||||
from organizations.models import Organization, OrganizationDataSnapshot
|
||||
|
||||
_QUOTE_CHARS = "\"'«»„“”"
|
||||
_LEGAL_FORM_PATTERNS = (
|
||||
r"\bобщество\s+с\s+ограниченной\s+ответственностью\b",
|
||||
r"\bооо\b",
|
||||
r"\booo\b",
|
||||
r"\bакционерное\s+общество\b",
|
||||
r"\bао\b",
|
||||
r"\bao\b",
|
||||
r"\bпубличное\s+акционерное\s+общество\b",
|
||||
r"\bпао\b",
|
||||
r"\bpao\b",
|
||||
r"\bзакрытое\s+акционерное\s+общество\b",
|
||||
r"\bзао\b",
|
||||
r"\bzao\b",
|
||||
r"\bиндивидуальный\s+предприниматель\b",
|
||||
r"\bип\b",
|
||||
)
|
||||
_ABBREVIATED_PREFIXES = (
|
||||
"ооо ",
|
||||
"ooo ",
|
||||
"ао ",
|
||||
"ao ",
|
||||
"пао ",
|
||||
"pao ",
|
||||
"зао ",
|
||||
"zao ",
|
||||
"ип ",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OrganizationCandidate:
|
||||
"""Organization data extracted from an existing source table."""
|
||||
|
||||
name: str
|
||||
inn: str = ""
|
||||
kpp: str = ""
|
||||
ogrn: str = ""
|
||||
ogrip: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PopulateOrganizationsResult:
|
||||
"""Result counters for organization population."""
|
||||
|
||||
scanned: int
|
||||
created: int
|
||||
updated: int
|
||||
skipped: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RefreshOrganizationDataSnapshotsResult:
|
||||
"""Result counters for precomputed organization API data snapshots."""
|
||||
|
||||
processed: int
|
||||
created: int
|
||||
updated: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrganizationLookup:
|
||||
"""In-memory indexes for matching organization candidates."""
|
||||
|
||||
by_inn_kpp: dict[tuple[str, str], Organization]
|
||||
by_ogrn_kpp: dict[tuple[str, str], Organization]
|
||||
by_inn: dict[str, list[Organization]]
|
||||
by_ogrn: dict[str, list[Organization]]
|
||||
by_ogrip: dict[str, Organization]
|
||||
by_normalized_name: dict[str, Organization]
|
||||
|
||||
|
||||
class OrganizationDataSnapshotRefreshService:
|
||||
"""Refreshes precomputed v2 data JSON for canonical organizations."""
|
||||
|
||||
@classmethod
|
||||
def refresh(
|
||||
cls,
|
||||
*,
|
||||
organization_uids: Iterable[str] | None = None,
|
||||
batch_size: int = 100,
|
||||
) -> RefreshOrganizationDataSnapshotsResult:
|
||||
queryset = Organization.objects.all().order_by("uid")
|
||||
if organization_uids is not None:
|
||||
queryset = queryset.filter(uid__in=list(organization_uids))
|
||||
|
||||
processed = 0
|
||||
created = 0
|
||||
updated = 0
|
||||
|
||||
for organizations in cls._iter_batches(queryset, batch_size):
|
||||
enrichment = OrganizationApiEnrichmentService.build_for(organizations)
|
||||
existing_snapshots = {
|
||||
str(snapshot.organization_id): snapshot
|
||||
for snapshot in OrganizationDataSnapshot.objects.filter(
|
||||
organization_id__in=[
|
||||
organization.uid for organization in organizations
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
create_instances: list[OrganizationDataSnapshot] = []
|
||||
update_instances: list[OrganizationDataSnapshot] = []
|
||||
for organization in organizations:
|
||||
processed += 1
|
||||
item = enrichment[str(organization.uid)]
|
||||
data = item.data_presence
|
||||
registries = [
|
||||
{
|
||||
"id": registry.id,
|
||||
"name": registry.name,
|
||||
}
|
||||
for registry in item.registries
|
||||
]
|
||||
|
||||
snapshot = existing_snapshots.get(str(organization.uid))
|
||||
if snapshot is None:
|
||||
create_instances.append(
|
||||
OrganizationDataSnapshot(
|
||||
organization=organization,
|
||||
data=data,
|
||||
registries=registries,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
snapshot.data = data
|
||||
snapshot.registries = registries
|
||||
snapshot.updated_at = timezone.now()
|
||||
update_instances.append(snapshot)
|
||||
|
||||
if create_instances:
|
||||
OrganizationDataSnapshot.objects.bulk_create(
|
||||
create_instances,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
created += len(create_instances)
|
||||
if update_instances:
|
||||
OrganizationDataSnapshot.objects.bulk_update(
|
||||
update_instances,
|
||||
fields=["data", "registries", "updated_at"],
|
||||
batch_size=batch_size,
|
||||
)
|
||||
updated += len(update_instances)
|
||||
|
||||
return RefreshOrganizationDataSnapshotsResult(
|
||||
processed=processed,
|
||||
created=created,
|
||||
updated=updated,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def refresh_for_parser_batch(
|
||||
cls,
|
||||
*,
|
||||
source: str,
|
||||
batch_id: int,
|
||||
batch_size: int = 100,
|
||||
) -> RefreshOrganizationDataSnapshotsResult:
|
||||
organization_uids = cls.organization_uids_for_parser_batch(
|
||||
source=source,
|
||||
batch_id=batch_id,
|
||||
)
|
||||
return cls.refresh(
|
||||
organization_uids=organization_uids,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def organization_uids_for_parser_batch(
|
||||
cls,
|
||||
*,
|
||||
source: str,
|
||||
batch_id: int,
|
||||
) -> list[str]:
|
||||
inn_values, ogrn_values = cls._parser_batch_identities(
|
||||
source=source,
|
||||
batch_id=batch_id,
|
||||
)
|
||||
if not inn_values and not ogrn_values:
|
||||
return []
|
||||
|
||||
query = Q()
|
||||
if inn_values:
|
||||
query |= Q(inn__in=inn_values)
|
||||
if ogrn_values:
|
||||
query |= Q(ogrn__in=ogrn_values) | Q(ogrip__in=ogrn_values)
|
||||
return [
|
||||
str(uid)
|
||||
for uid in Organization.objects.filter(query).values_list("uid", flat=True)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _parser_batch_identities(
|
||||
*,
|
||||
source: str,
|
||||
batch_id: int,
|
||||
) -> tuple[set[str], set[str]]:
|
||||
if source == ParserLoadLog.Source.INDUSTRIAL:
|
||||
return _identity_values(
|
||||
IndustrialCertificateRecord.objects.filter(load_batch=batch_id),
|
||||
inn_field="inn",
|
||||
ogrn_field="ogrn",
|
||||
)
|
||||
if source == ParserLoadLog.Source.INDUSTRIAL_PRODUCTS:
|
||||
return _identity_values(
|
||||
IndustrialProductRecord.objects.filter(load_batch=batch_id),
|
||||
inn_field="inn",
|
||||
ogrn_field="ogrn",
|
||||
)
|
||||
if source == ParserLoadLog.Source.MANUFACTURES:
|
||||
return _identity_values(
|
||||
ManufacturerRecord.objects.filter(load_batch=batch_id),
|
||||
inn_field="inn",
|
||||
ogrn_field="ogrn",
|
||||
)
|
||||
if source == ParserLoadLog.Source.INSPECTIONS:
|
||||
return _identity_values(
|
||||
InspectionRecord.objects.filter(load_batch=batch_id),
|
||||
inn_field="inn",
|
||||
ogrn_field="ogrn",
|
||||
)
|
||||
if source == ParserLoadLog.Source.PROCUREMENTS:
|
||||
return _identity_values(
|
||||
ProcurementRecord.objects.filter(load_batch=batch_id),
|
||||
inn_field="customer_inn",
|
||||
ogrn_field="customer_ogrn",
|
||||
)
|
||||
if source == ParserLoadLog.Source.FNS_REPORTS:
|
||||
return (
|
||||
set(),
|
||||
set(
|
||||
FinancialReport.objects.filter(load_batch=batch_id)
|
||||
.exclude(ogrn="")
|
||||
.values_list("ogrn", flat=True)
|
||||
.distinct()
|
||||
),
|
||||
)
|
||||
if source in {
|
||||
ParserLoadLog.Source.PROCUREMENTS_44FZ,
|
||||
ParserLoadLog.Source.PROCUREMENTS_223FZ,
|
||||
ParserLoadLog.Source.CONTRACTS,
|
||||
ParserLoadLog.Source.UNFAIR_SUPPLIERS,
|
||||
ParserLoadLog.Source.FAS_GOZ,
|
||||
ParserLoadLog.Source.ARBITRATION,
|
||||
ParserLoadLog.Source.FEDRESURS_BANKRUPTCY,
|
||||
ParserLoadLog.Source.FSTEC,
|
||||
ParserLoadLog.Source.TRUDVSEM,
|
||||
}:
|
||||
return _identity_values(
|
||||
GenericParserRecord.objects.filter(source=source, load_batch=batch_id),
|
||||
inn_field="inn",
|
||||
ogrn_field="ogrn",
|
||||
)
|
||||
|
||||
return set(), set()
|
||||
|
||||
@staticmethod
|
||||
def _iter_batches(queryset, batch_size: int) -> Iterable[list[Organization]]:
|
||||
batch: list[Organization] = []
|
||||
for organization in queryset.iterator(chunk_size=batch_size):
|
||||
batch.append(organization)
|
||||
if len(batch) >= batch_size:
|
||||
yield batch
|
||||
batch = []
|
||||
if batch:
|
||||
yield batch
|
||||
|
||||
|
||||
def _identity_values(
|
||||
queryset, *, inn_field: str, ogrn_field: str
|
||||
) -> tuple[set[str], set[str]]:
|
||||
inn_values = set(
|
||||
queryset.exclude(**{inn_field: ""}).values_list(inn_field, flat=True).distinct()
|
||||
)
|
||||
ogrn_values = set(
|
||||
queryset.exclude(**{ogrn_field: ""})
|
||||
.values_list(ogrn_field, flat=True)
|
||||
.distinct()
|
||||
)
|
||||
return inn_values, ogrn_values
|
||||
|
||||
|
||||
def normalize_identifier(value: str | int | None, *, max_length: int) -> str:
|
||||
"""Return digits-only identifier bounded by the target field length."""
|
||||
if value is None:
|
||||
return ""
|
||||
|
||||
normalized = re.sub(r"\D+", "", str(value))
|
||||
if not normalized or len(normalized) > max_length:
|
||||
return ""
|
||||
return normalized
|
||||
|
||||
|
||||
def normalize_organization_name(value: str | None) -> str:
|
||||
"""Normalize organization names for matching spelling variants."""
|
||||
if value is None:
|
||||
return ""
|
||||
|
||||
normalized = str(value).strip().lower().replace("ё", "е")
|
||||
normalized = normalized.translate(
|
||||
str.maketrans({char: " " for char in _QUOTE_CHARS})
|
||||
)
|
||||
normalized = re.sub(r"[^\w\s-]+", " ", normalized, flags=re.UNICODE)
|
||||
normalized = normalized.replace("-", " ")
|
||||
normalized = re.sub(r"\s+", " ", normalized).strip()
|
||||
|
||||
for pattern in _LEGAL_FORM_PATTERNS:
|
||||
normalized = re.sub(pattern, " ", normalized, flags=re.IGNORECASE)
|
||||
|
||||
return re.sub(r"\s+", " ", normalized).strip()
|
||||
|
||||
|
||||
class OrganizationPopulationService:
|
||||
"""Builds organizations from currently available source tables."""
|
||||
|
||||
@classmethod
|
||||
def populate(cls) -> PopulateOrganizationsResult:
|
||||
scanned = 0
|
||||
created = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
|
||||
with transaction.atomic():
|
||||
existing = list(Organization.objects.all())
|
||||
lookup = cls._build_lookup(existing)
|
||||
create_instances: list[Organization] = []
|
||||
update_instances_by_uid: dict[str, Organization] = {}
|
||||
|
||||
for candidate in cls.iter_candidates():
|
||||
scanned += 1
|
||||
if not normalize_organization_name(candidate.name):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
organization = cls._find_existing(lookup, candidate)
|
||||
if organization is None:
|
||||
organization = Organization(
|
||||
name=candidate.name.strip(),
|
||||
inn=candidate.inn,
|
||||
kpp=candidate.kpp,
|
||||
ogrn=candidate.ogrn,
|
||||
ogrip=candidate.ogrip,
|
||||
)
|
||||
existing.append(organization)
|
||||
create_instances.append(organization)
|
||||
cls._index_organization(lookup, organization)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
if cls._assign_existing_fields(organization, candidate):
|
||||
cls._index_organization(lookup, organization)
|
||||
update_instances_by_uid[str(organization.uid)] = organization
|
||||
updated += 1
|
||||
|
||||
if create_instances:
|
||||
Organization.objects.bulk_create(create_instances, batch_size=1000)
|
||||
update_instances = list(update_instances_by_uid.values())
|
||||
if update_instances:
|
||||
Organization.objects.bulk_update(
|
||||
update_instances,
|
||||
fields=["name", "inn", "kpp", "ogrn", "ogrip"],
|
||||
batch_size=1000,
|
||||
)
|
||||
|
||||
return PopulateOrganizationsResult(
|
||||
scanned=scanned,
|
||||
created=created,
|
||||
updated=updated,
|
||||
skipped=skipped,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def iter_candidates(cls) -> Iterable[OrganizationCandidate]:
|
||||
"""Yield organization candidates from all current source tables."""
|
||||
for row in RegisterOrganization.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.pn_name,
|
||||
inn=row.mn_inn,
|
||||
kpp=row.in_kpp,
|
||||
ogrn=row.mn_ogrn,
|
||||
)
|
||||
|
||||
for row in IndustrialCertificateRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.organisation_name,
|
||||
inn=row.inn,
|
||||
ogrn=row.ogrn,
|
||||
)
|
||||
|
||||
for row in ManufacturerRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.full_legal_name,
|
||||
inn=row.inn,
|
||||
ogrn=row.ogrn,
|
||||
)
|
||||
|
||||
for row in IndustrialProductRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.full_organisation_name,
|
||||
inn=row.inn,
|
||||
ogrn=row.ogrn,
|
||||
)
|
||||
|
||||
for row in GenericParserRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.organisation_name or row.title,
|
||||
inn=row.inn,
|
||||
kpp=cls._payload_kpp(row.payload),
|
||||
ogrn=row.ogrn,
|
||||
)
|
||||
|
||||
for row in InspectionRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.organisation_name,
|
||||
inn=row.inn,
|
||||
ogrn=row.ogrn,
|
||||
)
|
||||
|
||||
for row in ProcurementRecord.objects.iterator():
|
||||
yield cls._candidate(
|
||||
name=row.customer_name,
|
||||
inn=row.customer_inn,
|
||||
kpp=row.customer_kpp,
|
||||
ogrn=row.customer_ogrn,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _candidate(
|
||||
*,
|
||||
name: str | None,
|
||||
inn: str | int | None = None,
|
||||
kpp: str | int | None = None,
|
||||
ogrn: str | int | None = None,
|
||||
) -> OrganizationCandidate:
|
||||
normalized_inn = normalize_identifier(inn, max_length=12)
|
||||
normalized_ogrn = normalize_identifier(ogrn, max_length=15)
|
||||
ogrip = (
|
||||
normalized_ogrn
|
||||
if len(normalized_ogrn) == 15 and len(normalized_inn) == 12
|
||||
else ""
|
||||
)
|
||||
legal_ogrn = normalized_ogrn if len(normalized_ogrn) == 13 else ""
|
||||
|
||||
return OrganizationCandidate(
|
||||
name=(name or "").strip(),
|
||||
inn=normalized_inn,
|
||||
kpp="" if ogrip else normalize_identifier(kpp, max_length=9),
|
||||
ogrn=legal_ogrn,
|
||||
ogrip=ogrip,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _payload_kpp(cls, payload: object) -> str:
|
||||
if not isinstance(payload, dict):
|
||||
return ""
|
||||
|
||||
company = payload.get("company")
|
||||
if isinstance(company, dict):
|
||||
company_kpp = normalize_identifier(company.get("kpp"), max_length=9)
|
||||
if company_kpp:
|
||||
return company_kpp
|
||||
|
||||
return cls._find_payload_identifier(payload, {"kpp", "кпп"}, max_length=9)
|
||||
|
||||
@classmethod
|
||||
def _find_payload_identifier(
|
||||
cls,
|
||||
value: object,
|
||||
keys: set[str],
|
||||
*,
|
||||
max_length: int,
|
||||
) -> str:
|
||||
if isinstance(value, dict):
|
||||
for key, item in value.items():
|
||||
if str(key).strip().lower() in keys:
|
||||
identifier = normalize_identifier(item, max_length=max_length)
|
||||
if identifier:
|
||||
return identifier
|
||||
nested = cls._find_payload_identifier(item, keys, max_length=max_length)
|
||||
if nested:
|
||||
return nested
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
nested = cls._find_payload_identifier(item, keys, max_length=max_length)
|
||||
if nested:
|
||||
return nested
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def _build_lookup(cls, organizations: list[Organization]) -> OrganizationLookup:
|
||||
lookup = OrganizationLookup(
|
||||
by_inn_kpp={},
|
||||
by_ogrn_kpp={},
|
||||
by_inn={},
|
||||
by_ogrn={},
|
||||
by_ogrip={},
|
||||
by_normalized_name={},
|
||||
)
|
||||
for organization in organizations:
|
||||
cls._index_organization(lookup, organization)
|
||||
return lookup
|
||||
|
||||
@staticmethod
|
||||
def _index_organization(
|
||||
lookup: OrganizationLookup,
|
||||
organization: Organization,
|
||||
) -> None:
|
||||
if organization.inn and organization.kpp:
|
||||
lookup.by_inn_kpp.setdefault(
|
||||
(organization.inn, organization.kpp), organization
|
||||
)
|
||||
if organization.ogrn and organization.kpp:
|
||||
lookup.by_ogrn_kpp.setdefault(
|
||||
(organization.ogrn, organization.kpp), organization
|
||||
)
|
||||
if organization.inn:
|
||||
lookup.by_inn.setdefault(organization.inn, [])
|
||||
if organization not in lookup.by_inn[organization.inn]:
|
||||
lookup.by_inn[organization.inn].append(organization)
|
||||
if organization.ogrn:
|
||||
lookup.by_ogrn.setdefault(organization.ogrn, [])
|
||||
if organization not in lookup.by_ogrn[organization.ogrn]:
|
||||
lookup.by_ogrn[organization.ogrn].append(organization)
|
||||
if organization.ogrip:
|
||||
lookup.by_ogrip.setdefault(organization.ogrip, organization)
|
||||
|
||||
normalized_name = normalize_organization_name(organization.name)
|
||||
if normalized_name:
|
||||
lookup.by_normalized_name.setdefault(normalized_name, organization)
|
||||
|
||||
@staticmethod
|
||||
def _find_exact_identifier_match(
|
||||
lookup: OrganizationLookup,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> Organization | None:
|
||||
if candidate.inn and candidate.kpp:
|
||||
organization = lookup.by_inn_kpp.get((candidate.inn, candidate.kpp))
|
||||
if organization is not None:
|
||||
return organization
|
||||
if candidate.ogrn and candidate.kpp:
|
||||
organization = lookup.by_ogrn_kpp.get((candidate.ogrn, candidate.kpp))
|
||||
if organization is not None:
|
||||
return organization
|
||||
if candidate.ogrip and candidate.ogrip in lookup.by_ogrip:
|
||||
return lookup.by_ogrip[candidate.ogrip]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _find_blank_kpp_match(
|
||||
lookup: OrganizationLookup,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> Organization | None:
|
||||
if candidate.inn and candidate.kpp:
|
||||
blank_kpp_matches = [
|
||||
organization
|
||||
for organization in lookup.by_inn.get(candidate.inn, [])
|
||||
if not organization.kpp
|
||||
]
|
||||
if len(blank_kpp_matches) == 1:
|
||||
return blank_kpp_matches[0]
|
||||
if candidate.ogrn and candidate.kpp:
|
||||
blank_kpp_matches = [
|
||||
organization
|
||||
for organization in lookup.by_ogrn.get(candidate.ogrn, [])
|
||||
if not organization.kpp
|
||||
]
|
||||
if len(blank_kpp_matches) == 1:
|
||||
return blank_kpp_matches[0]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _find_single_identifier_match(
|
||||
lookup: OrganizationLookup,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> Organization | None:
|
||||
if candidate.inn and not candidate.kpp:
|
||||
organizations = lookup.by_inn.get(candidate.inn, [])
|
||||
if len(organizations) == 1:
|
||||
return organizations[0]
|
||||
if candidate.ogrn and not candidate.kpp:
|
||||
organizations = lookup.by_ogrn.get(candidate.ogrn, [])
|
||||
if len(organizations) == 1:
|
||||
return organizations[0]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _find_name_match(
|
||||
lookup: OrganizationLookup,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> Organization | None:
|
||||
candidate_name = normalize_organization_name(candidate.name)
|
||||
if not candidate_name:
|
||||
return None
|
||||
return lookup.by_normalized_name.get(candidate_name)
|
||||
|
||||
@classmethod
|
||||
def _find_existing(
|
||||
cls,
|
||||
lookup: OrganizationLookup,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> Organization | None:
|
||||
organization = cls._find_exact_identifier_match(lookup, candidate)
|
||||
if organization is not None:
|
||||
return organization
|
||||
|
||||
organization = cls._find_blank_kpp_match(lookup, candidate)
|
||||
if organization is not None:
|
||||
return organization
|
||||
|
||||
organization = cls._find_single_identifier_match(lookup, candidate)
|
||||
if organization is not None:
|
||||
return organization
|
||||
|
||||
if candidate.kpp and (candidate.inn or candidate.ogrn):
|
||||
return None
|
||||
return cls._find_name_match(lookup, candidate)
|
||||
|
||||
@classmethod
|
||||
def _update_existing(
|
||||
cls,
|
||||
organization: Organization,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> bool:
|
||||
if not cls._assign_existing_fields(organization, candidate):
|
||||
return False
|
||||
|
||||
organization.save(update_fields=["name", "inn", "kpp", "ogrn", "ogrip"])
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _assign_existing_fields(
|
||||
cls,
|
||||
organization: Organization,
|
||||
candidate: OrganizationCandidate,
|
||||
) -> bool:
|
||||
changed = False
|
||||
|
||||
selected_name = cls._select_name(organization.name, candidate.name)
|
||||
if selected_name != organization.name:
|
||||
organization.name = selected_name
|
||||
changed = True
|
||||
|
||||
for field_name in ("inn", "kpp", "ogrn", "ogrip"):
|
||||
if getattr(organization, field_name):
|
||||
continue
|
||||
|
||||
if field_name == "ogrip" and (organization.kpp or organization.ogrn):
|
||||
continue
|
||||
if field_name in {"kpp", "ogrn"} and organization.ogrip:
|
||||
continue
|
||||
|
||||
candidate_value = getattr(candidate, field_name)
|
||||
if candidate_value:
|
||||
setattr(organization, field_name, candidate_value)
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
@staticmethod
|
||||
def _select_name(current: str, candidate: str) -> str:
|
||||
current_clean = current.strip()
|
||||
candidate_clean = candidate.strip()
|
||||
if not candidate_clean:
|
||||
return current_clean
|
||||
if not current_clean:
|
||||
return candidate_clean
|
||||
|
||||
current_is_abbreviated = current_clean.lower().startswith(_ABBREVIATED_PREFIXES)
|
||||
candidate_is_abbreviated = candidate_clean.lower().startswith(
|
||||
_ABBREVIATED_PREFIXES
|
||||
)
|
||||
if current_is_abbreviated and not candidate_is_abbreviated:
|
||||
return candidate_clean
|
||||
if len(candidate_clean) > len(current_clean) and not candidate_is_abbreviated:
|
||||
return candidate_clean
|
||||
return current_clean
|
||||
Reference in New Issue
Block a user