fix(parsers): align vacancy sources and procurement counters
All checks were successful
CI/CD Pipeline / Quality Gate (push) Successful in 29s
CI/CD Pipeline / Build and Push Images (push) Successful in 10s
CI/CD Pipeline / Internal Notify (push) Successful in 0s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Successful in 1s

This commit is contained in:
2026-05-14 14:45:58 +02:00
parent 89607356b7
commit 6d1ec2e55c
12 changed files with 340 additions and 57 deletions

View File

@@ -7,6 +7,7 @@ from datetime import date, datetime
from typing import Any
from apps.parsers.models import (
VACANCY_RECORD_SOURCES,
FinancialReport,
FinancialReportLine,
GenericParserRecord,
@@ -148,6 +149,12 @@ def _source_matches(source: str) -> dict[str, set[str]]:
FinancialReport.objects.values_list("ogrn", flat=True).distinct()
),
}
if source == ParserLoadLog.Source.TRUDVSEM:
return OrganizationApiEnrichmentService._matching_identifiers_for_all(
GenericParserRecord.objects.filter(source__in=VACANCY_RECORD_SOURCES),
inn_field="inn",
ogrn_field="ogrn",
)
if source in GENERIC_SOURCES:
return OrganizationApiEnrichmentService._matching_identifiers_for_all(
GenericParserRecord.objects.filter(source=source),
@@ -395,6 +402,24 @@ class OrganizationApiEnrichmentService:
items.append(item)
presence[str(organization.uid)][to_api_data_source(source)] = items
@staticmethod
def _generic_query_sources(
selected_sources: list[str],
) -> tuple[list[str], dict[str, str]]:
query_sources: list[str] = []
source_bucket_by_record_source: dict[str, str] = {}
for source in selected_sources:
source_key = str(source)
expanded_sources = (
VACANCY_RECORD_SOURCES
if source == ParserLoadLog.Source.TRUDVSEM
else (source_key,)
)
for expanded_source in expanded_sources:
query_sources.append(str(expanded_source))
source_bucket_by_record_source[str(expanded_source)] = source_key
return query_sources, source_bucket_by_record_source
@classmethod
def _attach_generic_records(
cls,
@@ -411,6 +436,10 @@ class OrganizationApiEnrichmentService:
if identity_filter is None:
return
query_sources, source_bucket_by_record_source = cls._generic_query_sources(
selected_sources
)
records_by_source_and_inn: dict[str, dict[str, list[dict[str, Any]]]] = {
str(source): {} for source in selected_sources
}
@@ -419,13 +448,13 @@ class OrganizationApiEnrichmentService:
}
records = (
GenericParserRecord.objects.filter(source__in=selected_sources)
GenericParserRecord.objects.filter(source__in=query_sources)
.filter(identity_filter)
.order_by("source", "-created_at", "-id")
)
for record in records:
item = cls._serialize_generic_record(record)
source = str(record.source)
source = source_bucket_by_record_source[str(record.source)]
if record.inn:
records_by_source_and_inn[source].setdefault(record.inn, []).append(
item

View File

@@ -7,6 +7,7 @@ from collections.abc import Iterable
from dataclasses import dataclass
from apps.parsers.models import (
VACANCY_RECORD_SOURCES,
FinancialReport,
GenericParserRecord,
IndustrialCertificateRecord,
@@ -273,8 +274,16 @@ class OrganizationDataSnapshotRefreshService:
ParserLoadLog.Source.FSTEC,
ParserLoadLog.Source.TRUDVSEM,
}:
sources = (
VACANCY_RECORD_SOURCES
if source == ParserLoadLog.Source.TRUDVSEM
else (source,)
)
return _identity_values(
GenericParserRecord.objects.filter(source=source, load_batch=batch_id),
GenericParserRecord.objects.filter(
source__in=sources,
load_batch=batch_id,
),
inn_field="inn",
ogrn_field="ogrn",
)