fix(parsers): align vacancy sources and procurement counters

2026-05-14 14:45:58 +02:00
parent 89607356b7
commit 6d1ec2e55c
12 changed files with 340 additions and 57 deletions
--- a/src/organizations/api_enrichment.py
+++ b/src/organizations/api_enrichment.py
@@ -7,6 +7,7 @@ from datetime import date, datetime
 from typing import Any

 from apps.parsers.models import (
+    VACANCY_RECORD_SOURCES,
    FinancialReport,
    FinancialReportLine,
    GenericParserRecord,
@@ -148,6 +149,12 @@ def _source_matches(source: str) -> dict[str, set[str]]:
                FinancialReport.objects.values_list("ogrn", flat=True).distinct()
            ),
        }
+    if source == ParserLoadLog.Source.TRUDVSEM:
+        return OrganizationApiEnrichmentService._matching_identifiers_for_all(
+            GenericParserRecord.objects.filter(source__in=VACANCY_RECORD_SOURCES),
+            inn_field="inn",
+            ogrn_field="ogrn",
+        )
    if source in GENERIC_SOURCES:
        return OrganizationApiEnrichmentService._matching_identifiers_for_all(
            GenericParserRecord.objects.filter(source=source),
@@ -395,6 +402,24 @@ class OrganizationApiEnrichmentService:
                items.append(item)
            presence[str(organization.uid)][to_api_data_source(source)] = items

+    @staticmethod
+    def _generic_query_sources(
+        selected_sources: list[str],
+    ) -> tuple[list[str], dict[str, str]]:
+        query_sources: list[str] = []
+        source_bucket_by_record_source: dict[str, str] = {}
+        for source in selected_sources:
+            source_key = str(source)
+            expanded_sources = (
+                VACANCY_RECORD_SOURCES
+                if source == ParserLoadLog.Source.TRUDVSEM
+                else (source_key,)
+            )
+            for expanded_source in expanded_sources:
+                query_sources.append(str(expanded_source))
+                source_bucket_by_record_source[str(expanded_source)] = source_key
+        return query_sources, source_bucket_by_record_source
+
    @classmethod
    def _attach_generic_records(
        cls,
@@ -411,6 +436,10 @@ class OrganizationApiEnrichmentService:
        if identity_filter is None:
            return

+        query_sources, source_bucket_by_record_source = cls._generic_query_sources(
+            selected_sources
+        )
+
        records_by_source_and_inn: dict[str, dict[str, list[dict[str, Any]]]] = {
            str(source): {} for source in selected_sources
        }
@@ -419,13 +448,13 @@ class OrganizationApiEnrichmentService:
        }

        records = (
-            GenericParserRecord.objects.filter(source__in=selected_sources)
+            GenericParserRecord.objects.filter(source__in=query_sources)
            .filter(identity_filter)
            .order_by("source", "-created_at", "-id")
        )
        for record in records:
            item = cls._serialize_generic_record(record)
-            source = str(record.source)
+            source = source_bucket_by_record_source[str(record.source)]
            if record.inn:
                records_by_source_and_inn[source].setdefault(record.inn, []).append(
                    item
--- a/src/organizations/services.py
+++ b/src/organizations/services.py
@@ -7,6 +7,7 @@ from collections.abc import Iterable
 from dataclasses import dataclass

 from apps.parsers.models import (
+    VACANCY_RECORD_SOURCES,
    FinancialReport,
    GenericParserRecord,
    IndustrialCertificateRecord,
@@ -273,8 +274,16 @@ class OrganizationDataSnapshotRefreshService:
            ParserLoadLog.Source.FSTEC,
            ParserLoadLog.Source.TRUDVSEM,
        }:
+            sources = (
+                VACANCY_RECORD_SOURCES
+                if source == ParserLoadLog.Source.TRUDVSEM
+                else (source,)
+            )
            return _identity_values(
-                GenericParserRecord.objects.filter(source=source, load_batch=batch_id),
+                GenericParserRecord.objects.filter(
+                    source__in=sources,
+                    load_batch=batch_id,
+                ),
                inn_field="inn",
                ogrn_field="ogrn",
            )