717 lines
24 KiB
Python
717 lines
24 KiB
Python
"""Services for building the canonical organizations directory."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from collections.abc import Iterable
|
||
from dataclasses import dataclass
|
||
|
||
from apps.parsers.models import (
|
||
VACANCY_RECORD_SOURCES,
|
||
FinancialReport,
|
||
GenericParserRecord,
|
||
IndustrialCertificateRecord,
|
||
IndustrialProductRecord,
|
||
InspectionRecord,
|
||
ManufacturerRecord,
|
||
ParserLoadLog,
|
||
ProcurementRecord,
|
||
)
|
||
from django.db import transaction
|
||
from django.db.models import Q
|
||
from django.utils import timezone
|
||
from registers.models import Organization as RegisterOrganization
|
||
|
||
from organizations.api_enrichment import OrganizationApiEnrichmentService
|
||
from organizations.data_sources import data_source_summary
|
||
from organizations.models import Organization, OrganizationDataSnapshot
|
||
|
||
_QUOTE_CHARS = "\"'«»„“”"
|
||
_LEGAL_FORM_PATTERNS = (
|
||
r"\bобщество\s+с\s+ограниченной\s+ответственностью\b",
|
||
r"\bооо\b",
|
||
r"\booo\b",
|
||
r"\bакционерное\s+общество\b",
|
||
r"\bао\b",
|
||
r"\bao\b",
|
||
r"\bпубличное\s+акционерное\s+общество\b",
|
||
r"\bпао\b",
|
||
r"\bpao\b",
|
||
r"\bзакрытое\s+акционерное\s+общество\b",
|
||
r"\bзао\b",
|
||
r"\bzao\b",
|
||
r"\bиндивидуальный\s+предприниматель\b",
|
||
r"\bип\b",
|
||
)
|
||
_ABBREVIATED_PREFIXES = (
|
||
"ооо ",
|
||
"ooo ",
|
||
"ао ",
|
||
"ao ",
|
||
"пао ",
|
||
"pao ",
|
||
"зао ",
|
||
"zao ",
|
||
"ип ",
|
||
)
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class OrganizationCandidate:
|
||
"""Organization data extracted from an existing source table."""
|
||
|
||
name: str
|
||
inn: str = ""
|
||
kpp: str = ""
|
||
ogrn: str = ""
|
||
ogrip: str = ""
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class PopulateOrganizationsResult:
|
||
"""Result counters for organization population."""
|
||
|
||
scanned: int
|
||
created: int
|
||
updated: int
|
||
skipped: int
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class RefreshOrganizationDataSnapshotsResult:
|
||
"""Result counters for precomputed organization API data snapshots."""
|
||
|
||
processed: int
|
||
created: int
|
||
updated: int
|
||
|
||
|
||
@dataclass
|
||
class OrganizationLookup:
|
||
"""In-memory indexes for matching organization candidates."""
|
||
|
||
by_inn_kpp: dict[tuple[str, str], Organization]
|
||
by_ogrn_kpp: dict[tuple[str, str], Organization]
|
||
by_inn: dict[str, list[Organization]]
|
||
by_ogrn: dict[str, list[Organization]]
|
||
by_ogrip: dict[str, Organization]
|
||
by_normalized_name: dict[str, Organization]
|
||
|
||
|
||
class OrganizationDataSnapshotRefreshService:
|
||
"""Refreshes precomputed v2 data JSON for canonical organizations."""
|
||
|
||
@classmethod
|
||
def refresh(
|
||
cls,
|
||
*,
|
||
organization_uids: Iterable[str] | None = None,
|
||
batch_size: int = 100,
|
||
) -> RefreshOrganizationDataSnapshotsResult:
|
||
queryset = Organization.objects.all().order_by("uid")
|
||
if organization_uids is not None:
|
||
queryset = queryset.filter(uid__in=list(organization_uids))
|
||
|
||
processed = 0
|
||
created = 0
|
||
updated = 0
|
||
|
||
for organizations in cls._iter_batches(queryset, batch_size):
|
||
enrichment = OrganizationApiEnrichmentService.build_for(organizations)
|
||
existing_snapshots = {
|
||
str(snapshot.organization_id): snapshot
|
||
for snapshot in OrganizationDataSnapshot.objects.filter(
|
||
organization_id__in=[
|
||
organization.uid for organization in organizations
|
||
]
|
||
)
|
||
}
|
||
|
||
create_instances: list[OrganizationDataSnapshot] = []
|
||
update_instances: list[OrganizationDataSnapshot] = []
|
||
for organization in organizations:
|
||
processed += 1
|
||
item = enrichment[str(organization.uid)]
|
||
data = item.data_presence
|
||
data_source_counts = data_source_summary(data)
|
||
registries = [
|
||
{
|
||
"id": registry.id,
|
||
"name": registry.name,
|
||
}
|
||
for registry in item.registries
|
||
]
|
||
|
||
snapshot = existing_snapshots.get(str(organization.uid))
|
||
if snapshot is None:
|
||
create_instances.append(
|
||
OrganizationDataSnapshot(
|
||
organization=organization,
|
||
data=data,
|
||
data_source_counts=data_source_counts,
|
||
registries=registries,
|
||
)
|
||
)
|
||
continue
|
||
|
||
snapshot.data = data
|
||
snapshot.data_source_counts = data_source_counts
|
||
snapshot.registries = registries
|
||
snapshot.updated_at = timezone.now()
|
||
update_instances.append(snapshot)
|
||
|
||
if create_instances:
|
||
OrganizationDataSnapshot.objects.bulk_create(
|
||
create_instances,
|
||
batch_size=batch_size,
|
||
)
|
||
created += len(create_instances)
|
||
if update_instances:
|
||
OrganizationDataSnapshot.objects.bulk_update(
|
||
update_instances,
|
||
fields=["data", "data_source_counts", "registries", "updated_at"],
|
||
batch_size=batch_size,
|
||
)
|
||
updated += len(update_instances)
|
||
|
||
return RefreshOrganizationDataSnapshotsResult(
|
||
processed=processed,
|
||
created=created,
|
||
updated=updated,
|
||
)
|
||
|
||
@classmethod
|
||
def refresh_for_parser_batch(
|
||
cls,
|
||
*,
|
||
source: str,
|
||
batch_id: int,
|
||
batch_size: int = 100,
|
||
) -> RefreshOrganizationDataSnapshotsResult:
|
||
organization_uids = cls.organization_uids_for_parser_batch(
|
||
source=source,
|
||
batch_id=batch_id,
|
||
)
|
||
return cls.refresh(
|
||
organization_uids=organization_uids,
|
||
batch_size=batch_size,
|
||
)
|
||
|
||
@classmethod
|
||
def organization_uids_for_parser_batch(
|
||
cls,
|
||
*,
|
||
source: str,
|
||
batch_id: int,
|
||
) -> list[str]:
|
||
inn_values, ogrn_values = cls._parser_batch_identities(
|
||
source=source,
|
||
batch_id=batch_id,
|
||
)
|
||
if not inn_values and not ogrn_values:
|
||
return []
|
||
|
||
query = Q()
|
||
if inn_values:
|
||
query |= Q(inn__in=inn_values)
|
||
if ogrn_values:
|
||
query |= Q(ogrn__in=ogrn_values) | Q(ogrip__in=ogrn_values)
|
||
return [
|
||
str(uid)
|
||
for uid in Organization.objects.filter(query).values_list("uid", flat=True)
|
||
]
|
||
|
||
@staticmethod
|
||
def _parser_batch_identities(
|
||
*,
|
||
source: str,
|
||
batch_id: int,
|
||
) -> tuple[set[str], set[str]]:
|
||
if source == ParserLoadLog.Source.INDUSTRIAL:
|
||
return _identity_values(
|
||
IndustrialCertificateRecord.objects.filter(load_batch=batch_id),
|
||
inn_field="inn",
|
||
ogrn_field="ogrn",
|
||
)
|
||
if source == ParserLoadLog.Source.INDUSTRIAL_PRODUCTS:
|
||
return _identity_values(
|
||
IndustrialProductRecord.objects.filter(load_batch=batch_id),
|
||
inn_field="inn",
|
||
ogrn_field="ogrn",
|
||
)
|
||
if source == ParserLoadLog.Source.MANUFACTURES:
|
||
return _identity_values(
|
||
ManufacturerRecord.objects.filter(load_batch=batch_id),
|
||
inn_field="inn",
|
||
ogrn_field="ogrn",
|
||
)
|
||
if source == ParserLoadLog.Source.INSPECTIONS:
|
||
return _identity_values(
|
||
InspectionRecord.objects.filter(load_batch=batch_id),
|
||
inn_field="inn",
|
||
ogrn_field="ogrn",
|
||
)
|
||
if source == ParserLoadLog.Source.PROCUREMENTS:
|
||
return _identity_values(
|
||
ProcurementRecord.objects.filter(load_batch=batch_id),
|
||
inn_field="customer_inn",
|
||
ogrn_field="customer_ogrn",
|
||
)
|
||
if source == ParserLoadLog.Source.FNS_REPORTS:
|
||
return (
|
||
set(),
|
||
set(
|
||
FinancialReport.objects.filter(load_batch=batch_id)
|
||
.exclude(ogrn="")
|
||
.values_list("ogrn", flat=True)
|
||
.distinct()
|
||
),
|
||
)
|
||
if source in {
|
||
ParserLoadLog.Source.PROCUREMENTS_44FZ,
|
||
ParserLoadLog.Source.PROCUREMENTS_223FZ,
|
||
ParserLoadLog.Source.CONTRACTS,
|
||
ParserLoadLog.Source.UNFAIR_SUPPLIERS,
|
||
ParserLoadLog.Source.FAS_GOZ,
|
||
ParserLoadLog.Source.ARBITRATION,
|
||
ParserLoadLog.Source.FEDRESURS_BANKRUPTCY,
|
||
ParserLoadLog.Source.FSTEC,
|
||
ParserLoadLog.Source.TRUDVSEM,
|
||
}:
|
||
sources = (
|
||
VACANCY_RECORD_SOURCES
|
||
if source == ParserLoadLog.Source.TRUDVSEM
|
||
else (source,)
|
||
)
|
||
return _identity_values(
|
||
GenericParserRecord.objects.filter(
|
||
source__in=sources,
|
||
load_batch=batch_id,
|
||
),
|
||
inn_field="inn",
|
||
ogrn_field="ogrn",
|
||
)
|
||
|
||
return set(), set()
|
||
|
||
@staticmethod
|
||
def _iter_batches(queryset, batch_size: int) -> Iterable[list[Organization]]:
|
||
batch: list[Organization] = []
|
||
for organization in queryset.iterator(chunk_size=batch_size):
|
||
batch.append(organization)
|
||
if len(batch) >= batch_size:
|
||
yield batch
|
||
batch = []
|
||
if batch:
|
||
yield batch
|
||
|
||
|
||
def _identity_values(
|
||
queryset, *, inn_field: str, ogrn_field: str
|
||
) -> tuple[set[str], set[str]]:
|
||
inn_values = set(
|
||
queryset.exclude(**{inn_field: ""}).values_list(inn_field, flat=True).distinct()
|
||
)
|
||
ogrn_values = set(
|
||
queryset.exclude(**{ogrn_field: ""})
|
||
.values_list(ogrn_field, flat=True)
|
||
.distinct()
|
||
)
|
||
return inn_values, ogrn_values
|
||
|
||
|
||
def normalize_identifier(value: str | int | None, *, max_length: int) -> str:
|
||
"""Return digits-only identifier bounded by the target field length."""
|
||
if value is None:
|
||
return ""
|
||
|
||
normalized = re.sub(r"\D+", "", str(value))
|
||
if not normalized or len(normalized) > max_length:
|
||
return ""
|
||
return normalized
|
||
|
||
|
||
def normalize_organization_name(value: str | None) -> str:
|
||
"""Normalize organization names for matching spelling variants."""
|
||
if value is None:
|
||
return ""
|
||
|
||
normalized = str(value).strip().lower().replace("ё", "е")
|
||
normalized = normalized.translate(
|
||
str.maketrans({char: " " for char in _QUOTE_CHARS})
|
||
)
|
||
normalized = re.sub(r"[^\w\s-]+", " ", normalized, flags=re.UNICODE)
|
||
normalized = normalized.replace("-", " ")
|
||
normalized = re.sub(r"\s+", " ", normalized).strip()
|
||
|
||
for pattern in _LEGAL_FORM_PATTERNS:
|
||
normalized = re.sub(pattern, " ", normalized, flags=re.IGNORECASE)
|
||
|
||
return re.sub(r"\s+", " ", normalized).strip()
|
||
|
||
|
||
class OrganizationPopulationService:
|
||
"""Builds organizations from currently available source tables."""
|
||
|
||
@classmethod
|
||
def populate(cls) -> PopulateOrganizationsResult:
|
||
scanned = 0
|
||
created = 0
|
||
updated = 0
|
||
skipped = 0
|
||
|
||
with transaction.atomic():
|
||
existing = list(Organization.objects.all())
|
||
lookup = cls._build_lookup(existing)
|
||
create_instances: list[Organization] = []
|
||
update_instances_by_uid: dict[str, Organization] = {}
|
||
|
||
for candidate in cls.iter_candidates():
|
||
scanned += 1
|
||
if not normalize_organization_name(candidate.name):
|
||
skipped += 1
|
||
continue
|
||
|
||
organization = cls._find_existing(lookup, candidate)
|
||
if organization is None:
|
||
organization = Organization(
|
||
name=candidate.name.strip(),
|
||
inn=candidate.inn,
|
||
kpp=candidate.kpp,
|
||
ogrn=candidate.ogrn,
|
||
ogrip=candidate.ogrip,
|
||
)
|
||
existing.append(organization)
|
||
create_instances.append(organization)
|
||
cls._index_organization(lookup, organization)
|
||
created += 1
|
||
continue
|
||
|
||
if cls._assign_existing_fields(organization, candidate):
|
||
cls._index_organization(lookup, organization)
|
||
update_instances_by_uid[str(organization.uid)] = organization
|
||
updated += 1
|
||
|
||
if create_instances:
|
||
Organization.objects.bulk_create(create_instances, batch_size=1000)
|
||
update_instances = list(update_instances_by_uid.values())
|
||
if update_instances:
|
||
Organization.objects.bulk_update(
|
||
update_instances,
|
||
fields=["name", "inn", "kpp", "ogrn", "ogrip"],
|
||
batch_size=1000,
|
||
)
|
||
|
||
return PopulateOrganizationsResult(
|
||
scanned=scanned,
|
||
created=created,
|
||
updated=updated,
|
||
skipped=skipped,
|
||
)
|
||
|
||
@classmethod
|
||
def iter_candidates(cls) -> Iterable[OrganizationCandidate]:
|
||
"""Yield organization candidates from all current source tables."""
|
||
for row in RegisterOrganization.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.pn_name,
|
||
inn=row.mn_inn,
|
||
kpp=row.in_kpp,
|
||
ogrn=row.mn_ogrn,
|
||
)
|
||
|
||
for row in IndustrialCertificateRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.organisation_name,
|
||
inn=row.inn,
|
||
ogrn=row.ogrn,
|
||
)
|
||
|
||
for row in ManufacturerRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.full_legal_name,
|
||
inn=row.inn,
|
||
ogrn=row.ogrn,
|
||
)
|
||
|
||
for row in IndustrialProductRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.full_organisation_name,
|
||
inn=row.inn,
|
||
ogrn=row.ogrn,
|
||
)
|
||
|
||
for row in GenericParserRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.organisation_name or row.title,
|
||
inn=row.inn,
|
||
kpp=cls._payload_kpp(row.payload),
|
||
ogrn=row.ogrn,
|
||
)
|
||
|
||
for row in InspectionRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.organisation_name,
|
||
inn=row.inn,
|
||
ogrn=row.ogrn,
|
||
)
|
||
|
||
for row in ProcurementRecord.objects.iterator():
|
||
yield cls._candidate(
|
||
name=row.customer_name,
|
||
inn=row.customer_inn,
|
||
kpp=row.customer_kpp,
|
||
ogrn=row.customer_ogrn,
|
||
)
|
||
|
||
@staticmethod
|
||
def _candidate(
|
||
*,
|
||
name: str | None,
|
||
inn: str | int | None = None,
|
||
kpp: str | int | None = None,
|
||
ogrn: str | int | None = None,
|
||
) -> OrganizationCandidate:
|
||
normalized_inn = normalize_identifier(inn, max_length=12)
|
||
normalized_ogrn = normalize_identifier(ogrn, max_length=15)
|
||
ogrip = (
|
||
normalized_ogrn
|
||
if len(normalized_ogrn) == 15 and len(normalized_inn) == 12
|
||
else ""
|
||
)
|
||
legal_ogrn = normalized_ogrn if len(normalized_ogrn) == 13 else ""
|
||
|
||
return OrganizationCandidate(
|
||
name=(name or "").strip(),
|
||
inn=normalized_inn,
|
||
kpp="" if ogrip else normalize_identifier(kpp, max_length=9),
|
||
ogrn=legal_ogrn,
|
||
ogrip=ogrip,
|
||
)
|
||
|
||
@classmethod
|
||
def _payload_kpp(cls, payload: object) -> str:
|
||
if not isinstance(payload, dict):
|
||
return ""
|
||
|
||
company = payload.get("company")
|
||
if isinstance(company, dict):
|
||
company_kpp = normalize_identifier(company.get("kpp"), max_length=9)
|
||
if company_kpp:
|
||
return company_kpp
|
||
|
||
return cls._find_payload_identifier(payload, {"kpp", "кпп"}, max_length=9)
|
||
|
||
@classmethod
|
||
def _find_payload_identifier(
|
||
cls,
|
||
value: object,
|
||
keys: set[str],
|
||
*,
|
||
max_length: int,
|
||
) -> str:
|
||
if isinstance(value, dict):
|
||
for key, item in value.items():
|
||
if str(key).strip().lower() in keys:
|
||
identifier = normalize_identifier(item, max_length=max_length)
|
||
if identifier:
|
||
return identifier
|
||
nested = cls._find_payload_identifier(item, keys, max_length=max_length)
|
||
if nested:
|
||
return nested
|
||
elif isinstance(value, list):
|
||
for item in value:
|
||
nested = cls._find_payload_identifier(item, keys, max_length=max_length)
|
||
if nested:
|
||
return nested
|
||
return ""
|
||
|
||
@classmethod
|
||
def _build_lookup(cls, organizations: list[Organization]) -> OrganizationLookup:
|
||
lookup = OrganizationLookup(
|
||
by_inn_kpp={},
|
||
by_ogrn_kpp={},
|
||
by_inn={},
|
||
by_ogrn={},
|
||
by_ogrip={},
|
||
by_normalized_name={},
|
||
)
|
||
for organization in organizations:
|
||
cls._index_organization(lookup, organization)
|
||
return lookup
|
||
|
||
@staticmethod
|
||
def _index_organization(
|
||
lookup: OrganizationLookup,
|
||
organization: Organization,
|
||
) -> None:
|
||
if organization.inn and organization.kpp:
|
||
lookup.by_inn_kpp.setdefault(
|
||
(organization.inn, organization.kpp), organization
|
||
)
|
||
if organization.ogrn and organization.kpp:
|
||
lookup.by_ogrn_kpp.setdefault(
|
||
(organization.ogrn, organization.kpp), organization
|
||
)
|
||
if organization.inn:
|
||
lookup.by_inn.setdefault(organization.inn, [])
|
||
if organization not in lookup.by_inn[organization.inn]:
|
||
lookup.by_inn[organization.inn].append(organization)
|
||
if organization.ogrn:
|
||
lookup.by_ogrn.setdefault(organization.ogrn, [])
|
||
if organization not in lookup.by_ogrn[organization.ogrn]:
|
||
lookup.by_ogrn[organization.ogrn].append(organization)
|
||
if organization.ogrip:
|
||
lookup.by_ogrip.setdefault(organization.ogrip, organization)
|
||
|
||
normalized_name = normalize_organization_name(organization.name)
|
||
if normalized_name:
|
||
lookup.by_normalized_name.setdefault(normalized_name, organization)
|
||
|
||
@staticmethod
|
||
def _find_exact_identifier_match(
|
||
lookup: OrganizationLookup,
|
||
candidate: OrganizationCandidate,
|
||
) -> Organization | None:
|
||
if candidate.inn and candidate.kpp:
|
||
organization = lookup.by_inn_kpp.get((candidate.inn, candidate.kpp))
|
||
if organization is not None:
|
||
return organization
|
||
if candidate.ogrn and candidate.kpp:
|
||
organization = lookup.by_ogrn_kpp.get((candidate.ogrn, candidate.kpp))
|
||
if organization is not None:
|
||
return organization
|
||
if candidate.ogrip and candidate.ogrip in lookup.by_ogrip:
|
||
return lookup.by_ogrip[candidate.ogrip]
|
||
return None
|
||
|
||
@staticmethod
|
||
def _find_blank_kpp_match(
|
||
lookup: OrganizationLookup,
|
||
candidate: OrganizationCandidate,
|
||
) -> Organization | None:
|
||
if candidate.inn and candidate.kpp:
|
||
blank_kpp_matches = [
|
||
organization
|
||
for organization in lookup.by_inn.get(candidate.inn, [])
|
||
if not organization.kpp
|
||
]
|
||
if len(blank_kpp_matches) == 1:
|
||
return blank_kpp_matches[0]
|
||
if candidate.ogrn and candidate.kpp:
|
||
blank_kpp_matches = [
|
||
organization
|
||
for organization in lookup.by_ogrn.get(candidate.ogrn, [])
|
||
if not organization.kpp
|
||
]
|
||
if len(blank_kpp_matches) == 1:
|
||
return blank_kpp_matches[0]
|
||
return None
|
||
|
||
@staticmethod
|
||
def _find_single_identifier_match(
|
||
lookup: OrganizationLookup,
|
||
candidate: OrganizationCandidate,
|
||
) -> Organization | None:
|
||
if candidate.inn and not candidate.kpp:
|
||
organizations = lookup.by_inn.get(candidate.inn, [])
|
||
if len(organizations) == 1:
|
||
return organizations[0]
|
||
if candidate.ogrn and not candidate.kpp:
|
||
organizations = lookup.by_ogrn.get(candidate.ogrn, [])
|
||
if len(organizations) == 1:
|
||
return organizations[0]
|
||
return None
|
||
|
||
@staticmethod
|
||
def _find_name_match(
|
||
lookup: OrganizationLookup,
|
||
candidate: OrganizationCandidate,
|
||
) -> Organization | None:
|
||
candidate_name = normalize_organization_name(candidate.name)
|
||
if not candidate_name:
|
||
return None
|
||
return lookup.by_normalized_name.get(candidate_name)
|
||
|
||
@classmethod
|
||
def _find_existing(
|
||
cls,
|
||
lookup: OrganizationLookup,
|
||
candidate: OrganizationCandidate,
|
||
) -> Organization | None:
|
||
organization = cls._find_exact_identifier_match(lookup, candidate)
|
||
if organization is not None:
|
||
return organization
|
||
|
||
organization = cls._find_blank_kpp_match(lookup, candidate)
|
||
if organization is not None:
|
||
return organization
|
||
|
||
organization = cls._find_single_identifier_match(lookup, candidate)
|
||
if organization is not None:
|
||
return organization
|
||
|
||
if candidate.kpp and (candidate.inn or candidate.ogrn):
|
||
return None
|
||
return cls._find_name_match(lookup, candidate)
|
||
|
||
@classmethod
|
||
def _update_existing(
|
||
cls,
|
||
organization: Organization,
|
||
candidate: OrganizationCandidate,
|
||
) -> bool:
|
||
if not cls._assign_existing_fields(organization, candidate):
|
||
return False
|
||
|
||
organization.save(update_fields=["name", "inn", "kpp", "ogrn", "ogrip"])
|
||
return True
|
||
|
||
@classmethod
|
||
def _assign_existing_fields(
|
||
cls,
|
||
organization: Organization,
|
||
candidate: OrganizationCandidate,
|
||
) -> bool:
|
||
changed = False
|
||
|
||
selected_name = cls._select_name(organization.name, candidate.name)
|
||
if selected_name != organization.name:
|
||
organization.name = selected_name
|
||
changed = True
|
||
|
||
for field_name in ("inn", "kpp", "ogrn", "ogrip"):
|
||
if getattr(organization, field_name):
|
||
continue
|
||
|
||
if field_name == "ogrip" and (organization.kpp or organization.ogrn):
|
||
continue
|
||
if field_name in {"kpp", "ogrn"} and organization.ogrip:
|
||
continue
|
||
|
||
candidate_value = getattr(candidate, field_name)
|
||
if candidate_value:
|
||
setattr(organization, field_name, candidate_value)
|
||
changed = True
|
||
|
||
return changed
|
||
|
||
@staticmethod
|
||
def _select_name(current: str, candidate: str) -> str:
|
||
current_clean = current.strip()
|
||
candidate_clean = candidate.strip()
|
||
if not candidate_clean:
|
||
return current_clean
|
||
if not current_clean:
|
||
return candidate_clean
|
||
|
||
current_is_abbreviated = current_clean.lower().startswith(_ABBREVIATED_PREFIXES)
|
||
candidate_is_abbreviated = candidate_clean.lower().startswith(
|
||
_ABBREVIATED_PREFIXES
|
||
)
|
||
if current_is_abbreviated and not candidate_is_abbreviated:
|
||
return candidate_clean
|
||
if len(candidate_clean) > len(current_clean) and not candidate_is_abbreviated:
|
||
return candidate_clean
|
||
return current_clean
|