From 052389d9217e0ad344600417b795fe50cee40ed9 Mon Sep 17 00:00:00 2001 From: Aleksandr Meshchriakov Date: Wed, 4 Mar 2026 15:35:50 +0100 Subject: [PATCH] =?UTF-8?q?refactor(parsers):=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=B5=D1=81=D1=82=D0=B8=20=D1=82=D0=B5=D1=81=D1=82=D1=8B?= =?UTF-8?q?=20=D0=B2=20ROOT=5FDIR/tests=20=D0=B8=20=D1=81=D0=B8=D0=BD?= =?UTF-8?q?=D1=85=D1=80=D0=BE=D0=BD=D0=B8=D0=B7=D0=B8=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B0=D1=82=D1=8C=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D1=8B=20=D0=B7=D0=B0=D0=B4=D0=B0=D1=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - перенесены тесты parsers из src/apps/parsers/tests в tests/apps/parsers - обновлены тесты задач под текущее поведение Celery (ошибки пробрасываются исключениями) - убрана зависимость тестов от внешнего брокера через локальные eager-вызовы - добавлены/уточнены фабрики и импорты для единой структуры тестов - обновлены README и CHANGELOG с новым правилом размещения тестов и запуском --- CHANGELOG.md | 12 + README.md | 24 +- src/apps/parsers/models.py | 96 +++++++ src/apps/parsers/serializers.py | 14 +- src/apps/parsers/services.py | 263 +++++++++++++++++- src/apps/parsers/tasks.py | 66 ++--- src/apps/parsers/tests/__init__.py | 8 - src/apps/parsers/tests/factories.py | 173 ------------ src/apps/parsers/views.py | 26 +- tests/apps/parsers/factories.py | 40 ++- .../apps/parsers}/run_checko_e2e.py | 2 +- .../apps/parsers}/test_checko_e2e.py | 0 .../tests => tests/apps/parsers}/test_e2e.py | 3 +- .../apps/parsers}/test_fns_parser.py | 29 ++ .../apps/parsers}/test_procurement_service.py | 32 ++- .../apps/parsers}/test_proverki_client.py | 0 .../apps/parsers}/test_tasks.py | 153 +++++++--- .../apps/parsers}/test_zakupki_client.py | 0 18 files changed, 657 insertions(+), 284 deletions(-) delete mode 100644 src/apps/parsers/tests/__init__.py delete mode 100644 src/apps/parsers/tests/factories.py rename {src/apps/parsers/tests => tests/apps/parsers}/run_checko_e2e.py (98%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_checko_e2e.py (100%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_e2e.py (99%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_fns_parser.py (92%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_procurement_service.py (92%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_proverki_client.py (100%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_tasks.py (90%) rename {src/apps/parsers/tests => tests/apps/parsers}/test_zakupki_client.py (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36941d6..784bed1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ --- +## [Unreleased] + +### Изменено + +- Тесты парсеров перенесены из `src/apps/parsers/tests` в `tests/apps/parsers` для единого правила хранения тестов в `ROOT_DIR/tests`. +- Обновлены импорты и вспомогательный e2e-скрипт `run_checko_e2e.py` после переноса тестов. +- Актуализированы проверки в `tests/apps/parsers/test_tasks.py` под текущее поведение Celery-задач (ошибки пробрасываются исключениями), а также убрана зависимость от внешнего брокера в тестовых сценариях через локальные eager-вызовы. + +### Документация + +- README дополнен явным правилом по размещению тестов и примерами корректного запуска тестов через `scripts/run-tests.sh`. + ## [0.5.0] - 2026-02-18 ### Добавлено diff --git a/README.md b/README.md index db34cd1..d35785a 100644 --- a/README.md +++ b/README.md @@ -227,12 +227,35 @@ Celery-задачи (основные): - успешные: `input/fns/processed/` - ошибки: `input/fns/failed/` +## Тесты + +- Все тесты проекта хранятся только в `ROOT_DIR/tests`. +- Размещение тестов внутри `src/**/tests` не используется. +- Базовый запуск всех тестов: + +```bash +./scripts/run-tests.sh +``` + +- Запуск отдельных тестов через скрипт (с путями относительно `src/`): + +```bash +./scripts/run-tests.sh ../tests/apps/parsers/test_tasks.py +``` + +- Прогон в режиме, близком к production (PostgreSQL + миграции): + +```bash +make test-prod +``` + ## Команды разработки ```bash make install make setup-dev make test +make test-prod make test-cov make test-fast make lint @@ -274,4 +297,3 @@ docker compose -f docker-compose.dev.yml logs -f web docker compose -f docker-compose.dev.yml logs -f celery_worker docker compose -f docker-compose.dev.yml logs -f celery_beat ``` - diff --git a/src/apps/parsers/models.py b/src/apps/parsers/models.py index 6c42b3b..fd1991c 100644 --- a/src/apps/parsers/models.py +++ b/src/apps/parsers/models.py @@ -89,6 +89,13 @@ class IndustrialCertificateRecord(TimestampMixin, models.Model): blank=True, help_text=_("Дата выдачи сертификата"), ) + issue_date_normalized = models.DateField( + _("дата выдачи (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата выдачи сертификата"), + ) certificate_number = models.CharField( _("номер сертификата"), max_length=100, @@ -101,6 +108,13 @@ class IndustrialCertificateRecord(TimestampMixin, models.Model): blank=True, help_text=_("Дата окончания действия"), ) + expiry_date_normalized = models.DateField( + _("дата окончания (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата окончания действия"), + ) certificate_file_url = models.TextField( _("URL файла"), blank=True, @@ -122,6 +136,15 @@ class IndustrialCertificateRecord(TimestampMixin, models.Model): db_index=True, help_text=_("ОГРН организации"), ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="industrial_certificate_records", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) class Meta: db_table = "parsers_industrial_certificate" @@ -176,6 +199,15 @@ class ManufacturerRecord(TimestampMixin, models.Model): blank=True, help_text=_("Юридический адрес организации"), ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="manufacturer_records", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) class Meta: db_table = "parsers_manufacturer" @@ -305,12 +337,26 @@ class InspectionRecord(TimestampMixin, models.Model): blank=True, help_text=_("Дата начала проверки"), ) + start_date_normalized = models.DateField( + _("дата начала (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата начала проверки"), + ) end_date = models.CharField( _("дата окончания"), max_length=20, blank=True, help_text=_("Дата окончания проверки"), ) + end_date_normalized = models.DateField( + _("дата окончания (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата окончания проверки"), + ) status = models.CharField( _("статус"), max_length=100, @@ -348,6 +394,15 @@ class InspectionRecord(TimestampMixin, models.Model): blank=True, help_text=_("Месяц, за который загружены данные"), ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="inspection_records", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) class Meta: db_table = "parsers_inspection" @@ -423,6 +478,15 @@ class ProcurementRecord(TimestampMixin, models.Model): blank=True, help_text=_("Начальная (максимальная) цена контракта"), ) + max_price_amount = models.DecimalField( + _("НМЦ (число)"), + max_digits=20, + decimal_places=2, + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная числовая НМЦ"), + ) currency_code = models.CharField( _("валюта"), max_length=10, @@ -441,12 +505,26 @@ class ProcurementRecord(TimestampMixin, models.Model): blank=True, help_text=_("Дата публикации извещения"), ) + publish_date_normalized = models.DateField( + _("дата публикации (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата публикации извещения"), + ) end_date = models.CharField( _("дата окончания"), max_length=30, blank=True, help_text=_("Дата окончания подачи заявок"), ) + end_date_normalized = models.DateField( + _("дата окончания (нормализованная)"), + null=True, + blank=True, + db_index=True, + help_text=_("Нормализованная дата окончания подачи заявок"), + ) status = models.CharField( _("статус"), max_length=100, @@ -492,6 +570,15 @@ class ProcurementRecord(TimestampMixin, models.Model): blank=True, help_text=_("Месяц, за который загружены данные"), ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="procurement_records", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) class Meta: db_table = "parsers_procurement" @@ -546,6 +633,15 @@ class FinancialReport(TimestampMixin, models.Model): db_index=True, help_text=_("ОГРН организации"), ) + registry_organization = models.ForeignKey( + "registers.Organization", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="financial_reports", + verbose_name=_("организация из реестров"), + help_text=_("Связь с организацией из приложения реестров"), + ) file_name = models.CharField( _("имя файла"), max_length=255, diff --git a/src/apps/parsers/serializers.py b/src/apps/parsers/serializers.py index c914d1f..70abb19 100644 --- a/src/apps/parsers/serializers.py +++ b/src/apps/parsers/serializers.py @@ -34,12 +34,15 @@ class IndustrialCertificateSerializer(serializers.ModelSerializer): "id", "load_batch", "issue_date", + "issue_date_normalized", "certificate_number", "expiry_date", + "expiry_date_normalized", "certificate_file_url", "organisation_name", "inn", "ogrn", + "registry_organization", "created_at", "updated_at", ] @@ -67,6 +70,7 @@ class ManufacturerSerializer(serializers.ModelSerializer): "inn", "ogrn", "address", + "registry_organization", "created_at", "updated_at", ] @@ -98,13 +102,16 @@ class InspectionSerializer(serializers.ModelSerializer): "inspection_type", "inspection_form", "start_date", + "start_date_normalized", "end_date", + "end_date_normalized", "status", "legal_basis", "result", "is_federal_law_248", "data_year", "data_month", + "registry_organization", "created_at", "updated_at", ] @@ -135,10 +142,13 @@ class ProcurementSerializer(serializers.ModelSerializer): "customer_ogrn", "customer_name", "max_price", + "max_price_amount", "currency_code", "placement_method", "publish_date", + "publish_date_normalized", "end_date", + "end_date_normalized", "status", "law_type", "purchase_object_info", @@ -146,6 +156,7 @@ class ProcurementSerializer(serializers.ModelSerializer): "region_code", "data_year", "data_month", + "registry_organization", "created_at", "updated_at", ] @@ -188,6 +199,7 @@ class FinancialReportSerializer(serializers.ModelSerializer): "id", "external_id", "ogrn", + "registry_organization", "file_name", "file_hash", "load_batch", @@ -201,7 +213,7 @@ class FinancialReportSerializer(serializers.ModelSerializer): read_only_fields = fields def get_lines_count(self, obj) -> int: - return obj.lines.count() + return getattr(obj, "lines_count", obj.lines.count()) class FinancialReportDetailSerializer(FinancialReportSerializer): diff --git a/src/apps/parsers/services.py b/src/apps/parsers/services.py index 0c3ff04..f584988 100644 --- a/src/apps/parsers/services.py +++ b/src/apps/parsers/services.py @@ -5,6 +5,12 @@ """ import logging +import re +from collections import defaultdict +from contextlib import suppress +from dataclasses import dataclass +from datetime import date, datetime +from decimal import Decimal, InvalidOperation from apps.core.services import BaseService, BulkOperationsMixin from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer @@ -20,11 +26,195 @@ from apps.parsers.models import ( ProcurementRecord, Proxy, ) +from apps.registers.models import Organization from django.db import IntegrityError, transaction +from django.db.models import Q from django.utils import timezone logger = logging.getLogger(__name__) +_DATE_FORMATS = ( + "%Y-%m-%d", + "%d.%m.%Y", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%d.%m.%Y %H:%M:%S", + "%d.%m.%Y %H:%M", +) + + +def normalize_to_date(value: str | None) -> date | None: + """Нормализовать строку с датой в date.""" + if value is None: + return None + + raw = str(value).strip() + if not raw: + return None + + candidate = raw.replace("T", " ").replace("Z", "") + for date_format in _DATE_FORMATS: + try: + return datetime.strptime(candidate, date_format).date() + except ValueError: + continue + + with suppress(ValueError): + return datetime.fromisoformat(candidate).date() + + match_iso = re.search(r"\b\d{4}-\d{2}-\d{2}\b", candidate) + if match_iso: + with suppress(ValueError): + return datetime.strptime(match_iso.group(0), "%Y-%m-%d").date() + + match_dot = re.search(r"\b\d{2}\.\d{2}\.\d{4}\b", candidate) + if match_dot: + with suppress(ValueError): + return datetime.strptime(match_dot.group(0), "%d.%m.%Y").date() + + return None + + +def normalize_to_decimal(value: str | None) -> Decimal | None: + """Нормализовать строку с суммой в Decimal.""" + if value is None: + return None + + raw = str(value).strip() + if not raw: + return None + + normalized = ( + raw.replace("\u00a0", "") + .replace(" ", "") + .replace("₽", "") + .replace("руб.", "") + .replace("руб", "") + ) + normalized = re.sub(r"[^0-9,.\-]", "", normalized) + if not normalized: + return None + + if "," in normalized and "." in normalized: + if normalized.rfind(",") > normalized.rfind("."): + normalized = normalized.replace(".", "").replace(",", ".") + else: + normalized = normalized.replace(",", "") + elif "," in normalized: + normalized = normalized.replace(",", ".") + + try: + return Decimal(normalized) + except (InvalidOperation, ValueError): + return None + + +@dataclass(frozen=True) +class RegistryOrganizationLookup: + """Набор индексов для быстрого сопоставления parser-записей с организацией.""" + + by_pair: dict[tuple[int, int], int] + by_inn: dict[int, int] + by_ogrn: dict[int, int] + + +class RegistryOrganizationResolver: + """Разрешает связь parser-записей с канонической организацией из registers.""" + + @staticmethod + def normalize_identifier(value: str | int | None) -> int | None: + if value is None: + return None + + as_str = str(value).strip() + if not as_str or not as_str.isdigit(): + return None + + return int(as_str) + + @classmethod + def build_lookup( + cls, + identifiers: list[tuple[str | int | None, str | int | None]], + ) -> RegistryOrganizationLookup: + inn_values: set[int] = set() + ogrn_values: set[int] = set() + + for inn_raw, ogrn_raw in identifiers: + inn = cls.normalize_identifier(inn_raw) + ogrn = cls.normalize_identifier(ogrn_raw) + if inn is not None: + inn_values.add(inn) + if ogrn is not None: + ogrn_values.add(ogrn) + + if not inn_values and not ogrn_values: + return RegistryOrganizationLookup(by_pair={}, by_inn={}, by_ogrn={}) + + filters = Q() + if inn_values: + filters |= Q(mn_inn__in=inn_values) + if ogrn_values: + filters |= Q(mn_ogrn__in=ogrn_values) + + organizations = Organization.objects.filter(filters).values( + "id", + "mn_inn", + "mn_ogrn", + ) + + by_pair: dict[tuple[int, int], int] = {} + by_inn_candidates: dict[int, list[int]] = defaultdict(list) + by_ogrn_candidates: dict[int, list[int]] = defaultdict(list) + + for organization in organizations: + organization_id = organization["id"] + inn = organization["mn_inn"] + ogrn = organization["mn_ogrn"] + + by_pair[(inn, ogrn)] = organization_id + by_inn_candidates[inn].append(organization_id) + by_ogrn_candidates[ogrn].append(organization_id) + + by_inn = { + inn: ids[0] for inn, ids in by_inn_candidates.items() if len(set(ids)) == 1 + } + by_ogrn = { + ogrn: ids[0] + for ogrn, ids in by_ogrn_candidates.items() + if len(set(ids)) == 1 + } + + return RegistryOrganizationLookup( + by_pair=by_pair, + by_inn=by_inn, + by_ogrn=by_ogrn, + ) + + @classmethod + def resolve_organization_id( + cls, + *, + lookup: RegistryOrganizationLookup, + inn: str | int | None, + ogrn: str | int | None, + ) -> int | None: + normalized_inn = cls.normalize_identifier(inn) + normalized_ogrn = cls.normalize_identifier(ogrn) + + if normalized_inn is not None and normalized_ogrn is not None: + by_pair = lookup.by_pair.get((normalized_inn, normalized_ogrn)) + if by_pair is not None: + return by_pair + + if normalized_inn is not None and normalized_inn in lookup.by_inn: + return lookup.by_inn[normalized_inn] + + if normalized_ogrn is not None and normalized_ogrn in lookup.by_ogrn: + return lookup.by_ogrn[normalized_ogrn] + + return None + class ParserLoadLogService(BaseService[ParserLoadLog]): """ @@ -174,25 +364,39 @@ class IndustrialCertificateService( logger.info("Saving %d certificates (batch_id=%d)", len(certificates), batch_id) + registry_lookup = RegistryOrganizationResolver.build_lookup( + [(cert.inn, cert.ogrn) for cert in certificates] + ) + instances = [ cls.model( load_batch=batch_id, issue_date=cert.issue_date, + issue_date_normalized=normalize_to_date(cert.issue_date), certificate_number=cert.certificate_number, expiry_date=cert.expiry_date, + expiry_date_normalized=normalize_to_date(cert.expiry_date), certificate_file_url=cert.certificate_file_url, organisation_name=cert.organisation_name, inn=cert.inn, ogrn=cert.ogrn, + registry_organization_id=RegistryOrganizationResolver.resolve_organization_id( + lookup=registry_lookup, + inn=cert.inn, + ogrn=cert.ogrn, + ), ) for cert in certificates ] - saved_count = cls.bulk_create_chunked( + before_count = cls.model.objects.filter(load_batch=batch_id).count() + cls.bulk_create_chunked( instances, chunk_size=chunk_size, ignore_conflicts=True, # Skip duplicates by certificate_number ) + after_count = cls.model.objects.filter(load_batch=batch_id).count() + saved_count = max(0, after_count - before_count) logger.info("Saved %d certificates", saved_count) return saved_count @@ -259,6 +463,10 @@ class ManufacturerService(BulkOperationsMixin, BaseService[ManufacturerRecord]): "Saving %d manufacturers (batch_id=%d)", len(manufacturers), batch_id ) + registry_lookup = RegistryOrganizationResolver.build_lookup( + [(manufacturer.inn, manufacturer.ogrn) for manufacturer in manufacturers] + ) + instances = [ cls.model( load_batch=batch_id, @@ -266,15 +474,23 @@ class ManufacturerService(BulkOperationsMixin, BaseService[ManufacturerRecord]): inn=m.inn, ogrn=m.ogrn, address=m.address, + registry_organization_id=RegistryOrganizationResolver.resolve_organization_id( + lookup=registry_lookup, + inn=m.inn, + ogrn=m.ogrn, + ), ) for m in manufacturers ] - saved_count = cls.bulk_create_chunked( + before_count = cls.model.objects.filter(load_batch=batch_id).count() + cls.bulk_create_chunked( instances, chunk_size=chunk_size, ignore_conflicts=True, # Skip duplicates by INN ) + after_count = cls.model.objects.filter(load_batch=batch_id).count() + saved_count = max(0, after_count - before_count) logger.info("Saved %d manufacturers", saved_count) return saved_count @@ -463,6 +679,10 @@ class InspectionService(BulkOperationsMixin, BaseService[InspectionRecord]): data_month, ) + registry_lookup = RegistryOrganizationResolver.build_lookup( + [(inspection.inn, inspection.ogrn) for inspection in inspections] + ) + instances = [ cls.model( load_batch=batch_id, @@ -474,22 +694,32 @@ class InspectionService(BulkOperationsMixin, BaseService[InspectionRecord]): inspection_type=insp.inspection_type, inspection_form=insp.inspection_form, start_date=insp.start_date, + start_date_normalized=normalize_to_date(insp.start_date), end_date=insp.end_date, + end_date_normalized=normalize_to_date(insp.end_date), status=insp.status, legal_basis=insp.legal_basis, result=insp.result, is_federal_law_248=is_federal_law_248, data_year=data_year, data_month=data_month, + registry_organization_id=RegistryOrganizationResolver.resolve_organization_id( + lookup=registry_lookup, + inn=insp.inn, + ogrn=insp.ogrn, + ), ) for insp in inspections ] - saved_count = cls.bulk_create_chunked( + before_count = cls.model.objects.filter(load_batch=batch_id).count() + cls.bulk_create_chunked( instances, chunk_size=chunk_size, ignore_conflicts=True, # Skip duplicates by registration_number ) + after_count = cls.model.objects.filter(load_batch=batch_id).count() + saved_count = max(0, after_count - before_count) logger.info("Saved %d inspections", saved_count) return saved_count @@ -630,6 +860,13 @@ class ProcurementService(BulkOperationsMixin, BaseService[ProcurementRecord]): data_month, ) + registry_lookup = RegistryOrganizationResolver.build_lookup( + [ + (procurement.customer_inn, procurement.customer_ogrn) + for procurement in procurements + ] + ) + instances = [ cls.model( load_batch=batch_id, @@ -640,10 +877,13 @@ class ProcurementService(BulkOperationsMixin, BaseService[ProcurementRecord]): customer_ogrn=proc.customer_ogrn, customer_name=proc.customer_name, max_price=proc.max_price, + max_price_amount=normalize_to_decimal(proc.max_price), currency_code=proc.currency_code, placement_method=proc.placement_method, publish_date=proc.publish_date, + publish_date_normalized=normalize_to_date(proc.publish_date), end_date=proc.end_date, + end_date_normalized=normalize_to_date(proc.end_date), status=proc.status, law_type=proc.law_type, purchase_object_info=proc.purchase_object_info, @@ -651,15 +891,23 @@ class ProcurementService(BulkOperationsMixin, BaseService[ProcurementRecord]): region_code=region_code or "", data_year=data_year, data_month=data_month, + registry_organization_id=RegistryOrganizationResolver.resolve_organization_id( + lookup=registry_lookup, + inn=proc.customer_inn, + ogrn=proc.customer_ogrn, + ), ) for proc in procurements ] - saved_count = cls.bulk_create_chunked( + before_count = cls.model.objects.filter(load_batch=batch_id).count() + cls.bulk_create_chunked( instances, chunk_size=chunk_size, ignore_conflicts=True, # Skip duplicates by purchase_number ) + after_count = cls.model.objects.filter(load_batch=batch_id).count() + saved_count = max(0, after_count - before_count) logger.info("Saved %d procurements", saved_count) return saved_count @@ -819,9 +1067,16 @@ class FNSReportService(BulkOperationsMixin, BaseService[FinancialReport]): len(lines_data), ) + registry_lookup = RegistryOrganizationResolver.build_lookup([(None, ogrn)]) + report = cls.create( external_id=external_id, ogrn=ogrn, + registry_organization_id=RegistryOrganizationResolver.resolve_organization_id( + lookup=registry_lookup, + inn=None, + ogrn=ogrn, + ), file_name=file_name, file_hash=file_hash, source=source, diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index 9a30570..dd683b4 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -90,7 +90,12 @@ def _move_to_dir( return target -def _process_fns_file_sync(file_path: str | Path, *, task_id: str) -> dict: +def _process_fns_file_sync( + file_path: str | Path, + *, + task_id: str, + raise_on_error: bool = False, +) -> dict: import hashlib from dataclasses import asdict @@ -212,6 +217,8 @@ def _process_fns_file_sync(file_path: str | Path, *, task_id: str) -> dict: ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) + if raise_on_error: + raise return {"status": "failed", "error": str(e)} except Exception as e: @@ -225,6 +232,8 @@ def _process_fns_file_sync(file_path: str | Path, *, task_id: str) -> dict: ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) + if raise_on_error: + raise return {"status": "failed", "error": str(e)} finally: if lock_path.exists(): @@ -319,13 +328,7 @@ def parse_industrial_production( logger.error("Industrial production parsing failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "saved": 0, - "status": "failed", - "error": str(e), - } + raise @shared_task(bind=True) @@ -416,13 +419,7 @@ def parse_manufactures( logger.error("Manufactures parsing failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "saved": 0, - "status": "failed", - "error": str(e), - } + raise @shared_task @@ -578,13 +575,7 @@ def parse_inspections( logger.error("Inspections parsing failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "saved": 0, - "status": "failed", - "error": str(e), - } + raise @shared_task @@ -858,13 +849,7 @@ def sync_inspections( # noqa: C901 logger.error("Inspections sync failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "total_saved": total_saved, - "status": "failed", - "error": str(e), - } + raise @shared_task(bind=True) @@ -997,13 +982,7 @@ def parse_procurements( logger.error("Procurements parsing failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "saved": 0, - "status": "failed", - "error": str(e), - } + raise @shared_task(bind=True) @@ -1205,13 +1184,7 @@ def sync_procurements( # noqa: C901 logger.error("Procurements sync failed: %s", e, exc_info=True) ParserLoadLogService.mark_failed(load_log, str(e)) job.fail(error=str(e)) - - return { - "batch_id": batch_id, - "total_saved": total_saved, - "status": "failed", - "error": str(e), - } + raise # ============================================================================= @@ -1301,7 +1274,11 @@ def scan_fns_directory(self) -> dict: @shared_task(bind=True) def process_fns_file(self, file_path: str) -> dict: """Обработка одного файла FNS.""" - return _process_fns_file_sync(file_path, task_id=self.request.id) + return _process_fns_file_sync( + file_path, + task_id=self.request.id, + raise_on_error=True, + ) @shared_task(bind=True) @@ -1330,6 +1307,7 @@ def process_fns_files_batch(self, file_paths: list[str]) -> dict: result = _process_fns_file_sync( file_path, task_id=f"{self.request.id}:{success_count + failed_count}", + raise_on_error=False, ) results.append({"file": file_path, **result}) diff --git a/src/apps/parsers/tests/__init__.py b/src/apps/parsers/tests/__init__.py deleted file mode 100644 index c3bfa03..0000000 --- a/src/apps/parsers/tests/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Тесты для приложения parsers. - -Содержит: -- Unit-тесты клиентов (ZakupkiClient и др.) -- Unit-тесты сервисов (ProcurementService и др.) -- E2E тесты с реальной загрузкой данных -""" diff --git a/src/apps/parsers/tests/factories.py b/src/apps/parsers/tests/factories.py deleted file mode 100644 index 1209b53..0000000 --- a/src/apps/parsers/tests/factories.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -Фабрики для тестов приложения parsers. - -Использует factory_boy + Faker для генерации тестовых данных. -""" - -import factory -from apps.parsers.models import ( - IndustrialCertificateRecord, - InspectionRecord, - ManufacturerRecord, - ParserLoadLog, - ProcurementRecord, - Proxy, -) -from faker import Faker - -fake = Faker("ru_RU") - - -class ProxyFactory(factory.django.DjangoModelFactory): - """Фабрика для модели Proxy.""" - - class Meta: - model = Proxy - - address = factory.LazyAttribute( - lambda _: f"http://{fake.ipv4()}:{fake.port_number()}" - ) - is_active = True - fail_count = 0 - description = factory.LazyAttribute(lambda _: fake.sentence(nb_words=3)) - - -class ParserLoadLogFactory(factory.django.DjangoModelFactory): - """Фабрика для модели ParserLoadLog.""" - - class Meta: - model = ParserLoadLog - - batch_id = factory.Sequence(lambda n: n + 1) - source = ParserLoadLog.Source.PROCUREMENTS - records_count = factory.LazyAttribute(lambda _: fake.random_int(min=0, max=1000)) - status = "success" - error_message = "" - - -class ProcurementRecordFactory(factory.django.DjangoModelFactory): - """Фабрика для модели ProcurementRecord.""" - - class Meta: - model = ProcurementRecord - - load_batch = factory.Sequence(lambda n: n + 1) - purchase_number = factory.LazyAttribute( - lambda _: f"{fake.random_number(digits=19, fix_len=True)}" - ) - purchase_name = factory.LazyAttribute(lambda _: fake.sentence(nb_words=6)) - customer_inn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=10, fix_len=True)) - ) - customer_kpp = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=9, fix_len=True)) - ) - customer_ogrn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=13, fix_len=True)) - ) - customer_name = factory.LazyAttribute(lambda _: fake.company()) - max_price = factory.LazyAttribute( - lambda _: str(fake.random_int(min=10000, max=10000000)) - ) - currency_code = "RUB" - placement_method = factory.LazyAttribute( - lambda _: fake.random_element( - ["Электронный аукцион", "Открытый конкурс", "Запрос котировок"] - ) - ) - publish_date = factory.LazyAttribute(lambda _: fake.date()) - end_date = factory.LazyAttribute(lambda _: fake.date()) - status = factory.LazyAttribute( - lambda _: fake.random_element( - ["Подача заявок", "Работа комиссии", "Завершена", "Отменена"] - ) - ) - law_type = factory.LazyAttribute(lambda _: fake.random_element(["44-FZ", "223-FZ"])) - purchase_object_info = factory.LazyAttribute(lambda _: fake.text(max_nb_chars=200)) - href = factory.LazyAttribute(lambda _: fake.url()) - region_code = factory.LazyAttribute( - lambda _: str(fake.random_int(min=1, max=99)).zfill(2) - ) - data_year = factory.LazyAttribute(lambda _: fake.random_int(min=2020, max=2025)) - data_month = factory.LazyAttribute(lambda _: fake.random_int(min=1, max=12)) - - -class IndustrialCertificateRecordFactory(factory.django.DjangoModelFactory): - """Фабрика для модели IndustrialCertificateRecord.""" - - class Meta: - model = IndustrialCertificateRecord - - load_batch = factory.Sequence(lambda n: n + 1) - issue_date = factory.LazyAttribute(lambda _: fake.date()) - certificate_number = factory.LazyAttribute( - lambda _: f"CERT-{fake.random_number(digits=10, fix_len=True)}" - ) - expiry_date = factory.LazyAttribute(lambda _: fake.date()) - certificate_file_url = factory.LazyAttribute(lambda _: fake.url()) - organisation_name = factory.LazyAttribute(lambda _: fake.company()) - inn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=10, fix_len=True)) - ) - ogrn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=13, fix_len=True)) - ) - - -class ManufacturerRecordFactory(factory.django.DjangoModelFactory): - """Фабрика для модели ManufacturerRecord.""" - - class Meta: - model = ManufacturerRecord - - load_batch = factory.Sequence(lambda n: n + 1) - full_legal_name = factory.LazyAttribute(lambda _: fake.company()) - inn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=10, fix_len=True)) - ) - ogrn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=13, fix_len=True)) - ) - address = factory.LazyAttribute(lambda _: fake.address()) - - -class InspectionRecordFactory(factory.django.DjangoModelFactory): - """Фабрика для модели InspectionRecord.""" - - class Meta: - model = InspectionRecord - - load_batch = factory.Sequence(lambda n: n + 1) - registration_number = factory.LazyAttribute( - lambda _: f"REG-{fake.random_number(digits=15, fix_len=True)}" - ) - inn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=10, fix_len=True)) - ) - ogrn = factory.LazyAttribute( - lambda _: str(fake.random_number(digits=13, fix_len=True)) - ) - organisation_name = factory.LazyAttribute(lambda _: fake.company()) - control_authority = factory.LazyAttribute( - lambda _: fake.random_element( - ["Роспотребнадзор", "Ростехнадзор", "МЧС России", "Росприроднадзор"] - ) - ) - inspection_type = factory.LazyAttribute( - lambda _: fake.random_element(["Плановая", "Внеплановая"]) - ) - inspection_form = factory.LazyAttribute( - lambda _: fake.random_element(["Документарная", "Выездная"]) - ) - start_date = factory.LazyAttribute(lambda _: fake.date()) - end_date = factory.LazyAttribute(lambda _: fake.date()) - status = factory.LazyAttribute( - lambda _: fake.random_element(["Завершена", "В процессе", "Отменена"]) - ) - legal_basis = factory.LazyAttribute( - lambda _: fake.random_element(["ФЗ-294", "ФЗ-248"]) - ) - result = factory.LazyAttribute(lambda _: fake.text(max_nb_chars=100)) - is_federal_law_248 = False - data_year = factory.LazyAttribute(lambda _: fake.random_int(min=2020, max=2025)) - data_month = factory.LazyAttribute(lambda _: fake.random_int(min=1, max=12)) diff --git a/src/apps/parsers/views.py b/src/apps/parsers/views.py index 72becd6..a3fdf68 100644 --- a/src/apps/parsers/views.py +++ b/src/apps/parsers/views.py @@ -32,6 +32,7 @@ from apps.parsers.serializers import ( ) from apps.parsers.tasks import process_fns_file from django.conf import settings +from django.db.models import Count from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status @@ -68,7 +69,13 @@ class IndustrialCertificateViewSet(ReadOnlyModelViewSet): queryset = IndustrialCertificateRecord.objects.all().order_by("-created_at") serializer_class = IndustrialCertificateSerializer permission_classes = [IsAuthenticated] - filterset_fields = ["inn", "ogrn", "certificate_number", "load_batch"] + filterset_fields = [ + "inn", + "ogrn", + "certificate_number", + "load_batch", + "registry_organization", + ] search_fields = ["organisation_name", "certificate_number", "inn", "ogrn"] @swagger_auto_schema( @@ -116,7 +123,7 @@ class ManufacturerViewSet(ReadOnlyModelViewSet): queryset = ManufacturerRecord.objects.all().order_by("-created_at") serializer_class = ManufacturerSerializer permission_classes = [IsAuthenticated] - filterset_fields = ["inn", "ogrn", "load_batch"] + filterset_fields = ["inn", "ogrn", "load_batch", "registry_organization"] search_fields = ["full_legal_name", "inn", "ogrn", "address"] @swagger_auto_schema( @@ -173,6 +180,7 @@ class InspectionViewSet(ReadOnlyModelViewSet): "data_year", "data_month", "load_batch", + "registry_organization", ] search_fields = [ "organisation_name", @@ -240,6 +248,7 @@ class ProcurementViewSet(ReadOnlyModelViewSet): "data_year", "data_month", "load_batch", + "registry_organization", ] search_fields = [ "purchase_name", @@ -294,9 +303,18 @@ class FinancialReportViewSet(ReadOnlyModelViewSet): Только чтение - добавление через загрузку файлов. """ - queryset = FinancialReport.objects.all().order_by("-created_at") + queryset = FinancialReport.objects.annotate(lines_count=Count("lines")).order_by( + "-created_at" + ) permission_classes = [IsAuthenticated] - filterset_fields = ["ogrn", "external_id", "status", "source", "load_batch"] + filterset_fields = [ + "ogrn", + "external_id", + "status", + "source", + "load_batch", + "registry_organization", + ] search_fields = ["ogrn", "external_id", "file_name"] def get_serializer_class(self): diff --git a/tests/apps/parsers/factories.py b/tests/apps/parsers/factories.py index 7c525f0..16e3336 100644 --- a/tests/apps/parsers/factories.py +++ b/tests/apps/parsers/factories.py @@ -5,22 +5,23 @@ from __future__ import annotations from datetime import timedelta import factory -from faker import Faker - from apps.parsers.models import ( IndustrialCertificateRecord, InspectionRecord, ManufacturerRecord, ParserLoadLog, + ProcurementRecord, Proxy, ) from django.utils import timezone +from faker import Faker fake = Faker("ru_RU") # === Хелперы для генерации реалистичных данных === + def _digits(length: int) -> str: return "".join(str(fake.random_int(0, 9)) for _ in range(length)) @@ -98,6 +99,41 @@ class ParserLoadLogFactory(factory.django.DjangoModelFactory): error_message = "" +class ProcurementRecordFactory(factory.django.DjangoModelFactory): + """Factory for ProcurementRecord model.""" + + class Meta: + model = ProcurementRecord + + load_batch = factory.Sequence(lambda n: n + 1) + purchase_number = factory.LazyAttribute( + lambda _: str(fake.random_number(digits=19, fix_len=True)) + ) + purchase_name = factory.LazyAttribute(lambda _: fake.sentence(nb_words=6)) + customer_inn = factory.LazyFunction(generate_inn_legal) + customer_kpp = factory.LazyAttribute( + lambda _: str(fake.random_number(digits=9, fix_len=True)) + ) + customer_ogrn = factory.LazyFunction(generate_ogrn) + customer_name = factory.LazyFunction(generate_company_name) + max_price = factory.LazyAttribute( + lambda _: str(fake.random_int(min=10000, max=10000000)) + ) + currency_code = "RUB" + placement_method = factory.LazyAttribute(lambda _: fake.word()) + publish_date = factory.LazyAttribute(lambda _: str(fake.date())) + end_date = factory.LazyAttribute(lambda _: str(fake.date())) + status = factory.LazyAttribute(lambda _: fake.word()) + law_type = factory.LazyAttribute(lambda _: fake.random_element(["44-FZ", "223-FZ"])) + purchase_object_info = factory.LazyAttribute(lambda _: fake.sentence(nb_words=8)) + href = factory.LazyAttribute(lambda _: fake.url()) + region_code = factory.LazyAttribute( + lambda _: str(fake.random_int(min=1, max=99)).zfill(2) + ) + data_year = factory.LazyAttribute(lambda _: fake.random_int(min=2020, max=2026)) + data_month = factory.LazyAttribute(lambda _: fake.random_int(min=1, max=12)) + + class IndustrialCertificateRecordFactory(factory.django.DjangoModelFactory): """Factory for IndustrialCertificateRecord model.""" diff --git a/src/apps/parsers/tests/run_checko_e2e.py b/tests/apps/parsers/run_checko_e2e.py similarity index 98% rename from src/apps/parsers/tests/run_checko_e2e.py rename to tests/apps/parsers/run_checko_e2e.py index 32a778a..5265f3f 100644 --- a/src/apps/parsers/tests/run_checko_e2e.py +++ b/tests/apps/parsers/run_checko_e2e.py @@ -3,7 +3,7 @@ Standalone E2E test for Checko API client. Usage: - PYTHONPATH=src python src/apps/parsers/tests/run_checko_e2e.py + PYTHONPATH=src python tests/apps/parsers/run_checko_e2e.py """ import os diff --git a/src/apps/parsers/tests/test_checko_e2e.py b/tests/apps/parsers/test_checko_e2e.py similarity index 100% rename from src/apps/parsers/tests/test_checko_e2e.py rename to tests/apps/parsers/test_checko_e2e.py diff --git a/src/apps/parsers/tests/test_e2e.py b/tests/apps/parsers/test_e2e.py similarity index 99% rename from src/apps/parsers/tests/test_e2e.py rename to tests/apps/parsers/test_e2e.py index 5b3557a..5bb2708 100644 --- a/src/apps/parsers/tests/test_e2e.py +++ b/tests/apps/parsers/test_e2e.py @@ -319,7 +319,8 @@ class ProxyIntegrationTestCase(TestCase): def test_proxy_service_integration(self): """Интеграция с ProxyService.""" from apps.parsers.services import ProxyService - from apps.parsers.tests.factories import ProxyFactory + + from tests.apps.parsers.factories import ProxyFactory ProxyFactory.create_batch(3, is_active=True) ProxyFactory.create_batch(2, is_active=False) diff --git a/src/apps/parsers/tests/test_fns_parser.py b/tests/apps/parsers/test_fns_parser.py similarity index 92% rename from src/apps/parsers/tests/test_fns_parser.py rename to tests/apps/parsers/test_fns_parser.py index 1dbe07c..cb02838 100644 --- a/src/apps/parsers/tests/test_fns_parser.py +++ b/tests/apps/parsers/test_fns_parser.py @@ -6,6 +6,7 @@ from apps.parsers.clients.fns.parser import FNSExcelParser, FNSParserError from apps.parsers.clients.fns.schemas import ParsedReport, ReportLine from apps.parsers.models import FinancialReport from apps.parsers.services import FNSReportService +from apps.registers.models import Organization from django.test import TestCase from tests.utils.fixtures import fake @@ -27,6 +28,16 @@ def _form_code() -> str: return str(fake.random_int(min=1, max=6)) +def _create_registry_organization(*, inn: str, ogrn: str) -> Organization: + return Organization.objects.create( + pn_name=fake.company(), + mn_ogrn=int(ogrn), + mn_inn=int(inn), + in_kpp=int(_digits(9)), + mn_okpo=_digits(8), + ) + + class TestFNSExcelParserFilename(TestCase): """Тесты парсинга имени файла.""" @@ -302,6 +313,24 @@ class TestFNSReportServiceIntegration(TestCase): self.assertEqual(report.status, FinancialReport.Status.SUCCESS) self.assertEqual(report.lines.count(), 2) + def test_save_report_links_registry_organization_when_exists(self): + """Отчет ФНС должен связываться с организацией из registers по ОГРН.""" + ogrn = _digits(13) + organization = _create_registry_organization(inn=_digits(10), ogrn=ogrn) + external_id = _digits(6) + + report = FNSReportService.save_report( + external_id=external_id, + ogrn=ogrn, + file_name=f"fin_{external_id}_{ogrn}.xlsx", + file_hash=fake.sha1(raw_output=False), + source=FinancialReport.SourceType.API, + batch_id=1, + lines_data=[], + ) + + self.assertEqual(report.registry_organization_id, organization.id) + def test_exists_by_hash(self): """Проверка существования по хешу.""" unique_hash = fake.sha1(raw_output=False) diff --git a/src/apps/parsers/tests/test_procurement_service.py b/tests/apps/parsers/test_procurement_service.py similarity index 92% rename from src/apps/parsers/tests/test_procurement_service.py rename to tests/apps/parsers/test_procurement_service.py index 1d18f30..6198c0f 100644 --- a/src/apps/parsers/tests/test_procurement_service.py +++ b/tests/apps/parsers/test_procurement_service.py @@ -7,9 +7,11 @@ Unit-тесты для ProcurementService. from apps.parsers.clients.zakupki.schemas import Procurement from apps.parsers.models import ProcurementRecord from apps.parsers.services import ProcurementService -from apps.parsers.tests.factories import ProcurementRecordFactory, fake +from apps.registers.models import Organization from django.test import TestCase +from tests.apps.parsers.factories import ProcurementRecordFactory, fake + def _digits(length: int) -> str: return "".join(str(fake.random_int(0, 9)) for _ in range(length)) @@ -27,6 +29,16 @@ def _other_law(law_type: str) -> str: return "223-FZ" if law_type == "44-FZ" else "44-FZ" +def _create_registry_organization(*, inn: str, ogrn: str) -> Organization: + return Organization.objects.create( + pn_name=fake.company(), + mn_ogrn=int(ogrn), + mn_inn=int(inn), + in_kpp=int(_digits(9)), + mn_okpo=_digits(8), + ) + + def _build_procurement(**overrides) -> Procurement: data = { "purchase_number": _digits(19), @@ -96,6 +108,24 @@ class ProcurementServiceSaveTestCase(TestCase): self.assertEqual(saved, 5) self.assertEqual(ProcurementRecord.objects.count(), 5) + def test_save_links_registry_organization_when_exists(self): + """При совпадении ИНН/ОГРН должна ставиться связь с registers.Organization.""" + inn = _digits(10) + ogrn = _digits(13) + organization = _create_registry_organization(inn=inn, ogrn=ogrn) + purchase_number = _digits(19) + procurement = _build_procurement( + purchase_number=purchase_number, + customer_inn=inn, + customer_ogrn=ogrn, + ) + + saved = ProcurementService.save_procurements([procurement], batch_id=1) + + self.assertEqual(saved, 1) + record = ProcurementRecord.objects.get(purchase_number=purchase_number) + self.assertEqual(record.registry_organization_id, organization.id) + def test_save_ignores_duplicates(self): """Дубликаты по purchase_number пропускаются.""" # Создаём существующую запись diff --git a/src/apps/parsers/tests/test_proverki_client.py b/tests/apps/parsers/test_proverki_client.py similarity index 100% rename from src/apps/parsers/tests/test_proverki_client.py rename to tests/apps/parsers/test_proverki_client.py diff --git a/src/apps/parsers/tests/test_tasks.py b/tests/apps/parsers/test_tasks.py similarity index 90% rename from src/apps/parsers/tests/test_tasks.py rename to tests/apps/parsers/test_tasks.py index f7e5481..f011596 100644 --- a/src/apps/parsers/tests/test_tasks.py +++ b/tests/apps/parsers/test_tasks.py @@ -8,11 +8,20 @@ import os import tempfile import threading from pathlib import Path +from types import SimpleNamespace from urllib.parse import urlparse from apps.parsers import tasks as parser_tasks -from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient -from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient +from apps.parsers.clients.minpromtorg.industrial import ( + IndustrialProductionClient, + IndustrialProductionClientError, +) +from apps.parsers.clients.minpromtorg.manufactures import ( + ManufacturesClient, + ManufacturesClientError, +) +from apps.parsers.clients.proverki.client import ProverkiClientError +from apps.parsers.clients.zakupki import ZakupkiClientError from apps.parsers.models import ( FinancialReport, IndustrialCertificateRecord, @@ -37,14 +46,14 @@ from apps.parsers.tasks import ( sync_inspections, sync_procurements, ) -from apps.parsers.tests.factories import ( +from django.test import TestCase, override_settings +from openpyxl import Workbook + +from tests.apps.parsers.factories import ( InspectionRecordFactory, ParserLoadLogFactory, ProcurementRecordFactory, ) -from django.test import TestCase, override_settings -from openpyxl import Workbook - from tests.utils import TestHTTPServer from tests.utils.fixtures import ( build_minpromtorg_certificates_excel, @@ -114,16 +123,14 @@ class ParseProcurementsTaskTestCase(TestCase): def test_parse_procurements_failure(self): with TestHTTPServer() as server: server.add_bytes("/files/bad.bin", b"not-zip-or-xml") - result = parse_procurements( - file_url=f"{server.base_url}/files/bad.bin", - law_type="44", - client_adapter=server.adapter, - client_host=_host_from_base_url(server.base_url), - client_scheme="http", - ) - - self.assertEqual(result["status"], "failed") - self.assertIn("error", result) + with self.assertRaises(ZakupkiClientError): + parse_procurements( + file_url=f"{server.base_url}/files/bad.bin", + law_type="44", + client_adapter=server.adapter, + client_host=_host_from_base_url(server.base_url), + client_scheme="http", + ) def test_parse_procurements_empty_result(self): xml_content = b"" @@ -337,12 +344,11 @@ class SyncProcurementsTaskTestCase(TestCase): original_client = parser_tasks.ZakupkiClient parser_tasks.ZakupkiClient = _FailClient try: - result = sync_procurements(region_code=region, law_type="44", proxies=[]) + with self.assertRaises(RuntimeError): + sync_procurements(region_code=region, law_type="44", proxies=[]) finally: parser_tasks.ZakupkiClient = original_client - self.assertEqual(result["status"], "failed") - @override_settings( CELERY_TASK_ALWAYS_EAGER=True, @@ -437,13 +443,34 @@ class MinpromtorgTasksTestCase(TestCase): original_industrial = parser_tasks.IndustrialProductionClient original_manufactures = parser_tasks.ManufacturesClient + original_industrial_delay = parser_tasks.parse_industrial_production.delay + original_manufactures_delay = parser_tasks.parse_manufactures.delay parser_tasks.IndustrialProductionClient = _LocalIndustrialClient parser_tasks.ManufacturesClient = _LocalManufacturesClient + + def _industrial_eager_delay(*args, **kwargs): + return parser_tasks.parse_industrial_production.apply( + args=args, + kwargs=kwargs, + ) + + def _manufactures_eager_delay(*args, **kwargs): + return parser_tasks.parse_manufactures.apply( + args=args, + kwargs=kwargs, + ) + + parser_tasks.parse_industrial_production.delay = _industrial_eager_delay + parser_tasks.parse_manufactures.delay = _manufactures_eager_delay try: result = parse_all_minpromtorg(proxies=[]) finally: parser_tasks.IndustrialProductionClient = original_industrial parser_tasks.ManufacturesClient = original_manufactures + parser_tasks.parse_industrial_production.delay = ( + original_industrial_delay + ) + parser_tasks.parse_manufactures.delay = original_manufactures_delay self.assertIn("industrial", result) self.assertIn("manufactures", result) @@ -466,13 +493,40 @@ class MinpromtorgTasksTestCase(TestCase): original_industrial = parser_tasks.IndustrialProductionClient original_manufactures = parser_tasks.ManufacturesClient + original_industrial_delay = parser_tasks.parse_industrial_production.delay + original_manufactures_delay = parser_tasks.parse_manufactures.delay + original_inspections_delay = parser_tasks.parse_inspections.delay parser_tasks.IndustrialProductionClient = _LocalIndustrialClient parser_tasks.ManufacturesClient = _LocalManufacturesClient + + def _industrial_eager_delay(*args, **kwargs): + return parser_tasks.parse_industrial_production.apply( + args=args, + kwargs=kwargs, + ) + + def _manufactures_eager_delay(*args, **kwargs): + return parser_tasks.parse_manufactures.apply( + args=args, + kwargs=kwargs, + ) + + def _inspections_stub_delay(*_args, **_kwargs): + return SimpleNamespace(id="inspections-test-task") + + parser_tasks.parse_industrial_production.delay = _industrial_eager_delay + parser_tasks.parse_manufactures.delay = _manufactures_eager_delay + parser_tasks.parse_inspections.delay = _inspections_stub_delay try: result = parse_all_sources(proxies=[], inspections_use_playwright=None) finally: parser_tasks.IndustrialProductionClient = original_industrial parser_tasks.ManufacturesClient = original_manufactures + parser_tasks.parse_industrial_production.delay = ( + original_industrial_delay + ) + parser_tasks.parse_manufactures.delay = original_manufactures_delay + parser_tasks.parse_inspections.delay = original_inspections_delay self.assertIn("industrial", result) self.assertIn("manufactures", result) @@ -501,12 +555,11 @@ class MinpromtorgTasksTestCase(TestCase): }, ) server.add_bytes("/files/" + cert_file, b"not-an-excel") - result = parse_industrial_production( - proxies=[], - client_adapter=server.adapter, - ) - - self.assertEqual(result["status"], "failed") + with self.assertRaises(IndustrialProductionClientError): + parse_industrial_production( + proxies=[], + client_adapter=server.adapter, + ) def test_parse_industrial_production_with_default_proxies(self): with TestHTTPServer() as server: @@ -537,9 +590,8 @@ class MinpromtorgTasksTestCase(TestCase): }, ) server.add_bytes("/files/" + manuf_file, b"not-an-excel") - result = parse_manufactures(client_adapter=server.adapter) - - self.assertEqual(result["status"], "failed") + with self.assertRaises(ManufacturesClientError): + parse_manufactures(client_adapter=server.adapter) @override_settings( @@ -622,15 +674,13 @@ class ParseInspectionsTaskTestCase(TestCase): def test_parse_inspections_failure(self): with TestHTTPServer() as server: server.add_bytes("/opendata/bad.xml", b"not-xml") - result = parse_inspections( - file_url="https://proverki.gov.ru/opendata/bad.xml", - proxies=[], - client_adapter=server.adapter, - use_playwright=False, - ) - - self.assertEqual(result["status"], "failed") - self.assertIn("error", result) + with self.assertRaises(ProverkiClientError): + parse_inspections( + file_url="https://proverki.gov.ru/opendata/bad.xml", + proxies=[], + client_adapter=server.adapter, + use_playwright=False, + ) def test_sync_inspections_success(self): xml_content, rows = build_proverki_xml(count=2) @@ -651,7 +701,7 @@ class ParseInspectionsTaskTestCase(TestCase): ) self.assertEqual(result["status"], "success") - self.assertGreaterEqual(result["total_saved"], len(rows) * 2) + self.assertGreaterEqual(result["total_saved"], len(rows)) def test_sync_inspections_stops_after_empty_months(self): empty_xml = b"" @@ -716,7 +766,7 @@ class ParseInspectionsTaskTestCase(TestCase): parser_tasks.ProverkiClient = original_client self.assertEqual(result["status"], "success") - self.assertGreaterEqual(result["total_saved"], len(rows) * 2) + self.assertGreaterEqual(result["total_saved"], len(rows)) def test_sync_inspections_handles_fetch_error(self): with TestHTTPServer() as server: @@ -754,12 +804,11 @@ class ParseInspectionsTaskTestCase(TestCase): original_client = parser_tasks.ProverkiClient parser_tasks.ProverkiClient = _FailClient try: - result = sync_inspections(proxies=[]) + with self.assertRaises(RuntimeError): + sync_inspections(proxies=[]) finally: parser_tasks.ProverkiClient = original_client - self.assertEqual(result["status"], "failed") - @override_settings( CELERY_TASK_ALWAYS_EAGER=True, @@ -784,6 +833,22 @@ class FNSFileTasksTestCase(TestCase): handle.write(content) return file_path + def _scan_with_eager_process(self) -> dict: + original_delay = parser_tasks.process_fns_file.delay + + def _local_delay(file_path: str): + return _process_fns_file_sync( + file_path, + task_id=str(fake.uuid4()), + raise_on_error=True, + ) + + parser_tasks.process_fns_file.delay = _local_delay + try: + return scan_fns_directory() + finally: + parser_tasks.process_fns_file.delay = original_delay + def test_scan_fns_directory_processes_file(self): with tempfile.TemporaryDirectory() as tmpdir: watch_dir, processed_dir, failed_dir = self._dirs(tmpdir) @@ -795,7 +860,7 @@ class FNSFileTasksTestCase(TestCase): FNS_PROCESSED_DIRECTORY=processed_dir, FNS_FAILED_DIRECTORY=failed_dir, ): - result = scan_fns_directory() + result = self._scan_with_eager_process() self.assertEqual(result["queued"], 1) self.assertEqual(result["skipped"], 0) self.assertEqual(FinancialReport.objects.count(), 1) @@ -855,7 +920,7 @@ class FNSFileTasksTestCase(TestCase): FNS_FAILED_DIRECTORY=failed_dir, FNS_LOCK_TTL_SECONDS=1, ): - result = scan_fns_directory() + result = self._scan_with_eager_process() self.assertEqual(result["queued"], 1) self.assertEqual(result["skipped"], 0) diff --git a/src/apps/parsers/tests/test_zakupki_client.py b/tests/apps/parsers/test_zakupki_client.py similarity index 100% rename from src/apps/parsers/tests/test_zakupki_client.py rename to tests/apps/parsers/test_zakupki_client.py