Files
mostovik-backend/tests/apps/parsers/test_source_cards_service.py
Aleksandr Meshchriakov b8a18d6da4
Some checks failed
CI/CD Pipeline / Quality Gate (push) Failing after 14s
CI/CD Pipeline / Build and Push Images (push) Has been skipped
CI/CD Pipeline / Deploy Dev in Dokploy (push) Has been skipped
CI/CD Pipeline / Internal Notify (push) Successful in 0s
feat: migrate parser data to source records
2026-05-19 20:21:31 +02:00

614 lines
22 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from datetime import timedelta
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
from apps.core.models import BackgroundJob, JobStatus
from apps.parsers.models import ParserLoadLog
from apps.parsers.source_cards import (
SOURCE_CARD_DEFINITIONS,
SourceCardDefinition,
SourceCardService,
SourceItemDefinition,
)
from django.db import connection
from django.http import Http404
from django.test import SimpleTestCase, TestCase, override_settings
from django.test.utils import CaptureQueriesContext
from django.utils import timezone
from organizations.source_ingestion import (
OrganizationSourceIngestionService,
SourceRecordInput,
)
from rest_framework.exceptions import ValidationError
def _save_source_record(
*,
source: str,
external_id: str,
inn: str = "",
organization_name: str = "",
title: str = "",
payload: dict | None = None,
) -> None:
OrganizationSourceIngestionService.save_records(
source=source,
load_batch=1,
records=[
SourceRecordInput(
external_id=external_id,
title=title,
organization_name=organization_name or title or external_id,
inn=inn,
payload=payload or {},
)
],
)
class SourceCardServiceUnitTest(SimpleTestCase):
def test_list_cards_exposes_all_frontend_category_slugs_in_menu_order(self):
self.assertEqual(
[card.slug for card in SOURCE_CARD_DEFINITIONS],
[
"financial-indicators",
"public-procurements",
"manufacturers-and-products",
"planned-inspections",
"bankruptcy-procedures",
"defense-unreliable-suppliers",
"arbitration-cases",
"information-security-registries",
"labor-vacancies",
],
)
self.assertEqual(
[card.title for card in SOURCE_CARD_DEFINITIONS],
[
"Финансово-экономические показатели",
"Государственные закупки по 44-ФЗ и 223-ФЗ",
"Производители и продукция России",
"Плановые проверки Генпрокуратуры России",
"Сведения о процедурах банкротства",
"Недобросовестные поставщики ГОЗ",
"Арбитражные дела",
"Реестры по информационной безопасности",
"Вакансии Работа России",
],
)
def test_get_definition_raises_for_unknown_slug(self):
with self.assertRaises(Http404):
SourceCardService.get_definition("missing-card")
def test_validate_refresh_params_rejects_unknown_param(self):
definition = SourceCardService.get_definition("public-procurements")
with self.assertRaises(ValidationError) as error:
SourceCardService._validate_refresh_params(
definition,
{"region_code": "77", "unexpected": "value"},
)
self.assertIn("Неизвестные параметры обновления", str(error.exception.detail))
def test_validate_refresh_params_casts_integers(self):
definition = SourceCardService.get_definition("public-procurements")
validated = SourceCardService._validate_refresh_params(
definition,
{
"region_code": "77",
"current_year": "2025",
"current_month": "2",
},
)
self.assertEqual(
validated,
{
"region_code": "77",
"law_type": "44",
"current_year": 2025,
"current_month": 2,
},
)
def test_validate_refresh_params_raises_on_invalid_integer(self):
definition = SourceCardService.get_definition("public-procurements")
with self.assertRaises(ValidationError) as error:
SourceCardService._validate_refresh_params(
definition,
{"region_code": "77", "current_year": "not-a-number"},
)
self.assertIn("Значение должно быть целым числом", str(error.exception.detail))
@patch(
"apps.parsers.source_cards.SourceCardService._enqueue_task",
side_effect=[
{
"task_id": "task-1",
"task_name": "apps.parsers.tasks.parse_industrial_production",
},
{
"task_id": "task-2",
"task_name": "apps.parsers.tasks.parse_industrial_products",
},
{"task_id": "task-3", "task_name": "apps.parsers.tasks.parse_manufactures"},
],
)
def test_refresh_card_for_manufacturers_enqueues_three_tasks(self, enqueue_mock):
result = SourceCardService.refresh_card(
slug="manufacturers-and-products",
requested_by_id=12,
)
self.assertEqual(result["source_card"], "manufacturers-and-products")
self.assertEqual(
[item["task_id"] for item in result["tasks"]],
["task-1", "task-2", "task-3"],
)
self.assertEqual(enqueue_mock.call_count, 3)
@patch(
"apps.parsers.source_cards.SourceCardService._enqueue_task",
return_value={
"task_id": "task-1",
"task_name": "apps.parsers.tasks.sync_inspections",
},
)
def test_launch_refresh_for_inspections_passes_supported_kwargs_only(
self, enqueue_mock
):
definition = SourceCardService.get_definition("planned-inspections")
result = SourceCardService._launch_refresh(
definition,
requested_by_id=44,
params={
"current_year": 2025,
"current_month": 3,
"use_playwright": True,
"ignored": "value",
},
)
self.assertEqual(
result,
[{"task_id": "task-1", "task_name": "apps.parsers.tasks.sync_inspections"}],
)
self.assertEqual(
enqueue_mock.call_args.kwargs["kwargs"],
{
"requested_by_id": 44,
"current_year": 2025,
"current_month": 3,
"use_playwright": True,
},
)
@patch(
"apps.parsers.source_cards.SourceCardService._enqueue_task",
return_value={
"task_id": "task-9",
"task_name": "apps.parsers.tasks.sync_procurements",
},
)
def test_refresh_card_for_procurements_uses_default_law_type(self, enqueue_mock):
result = SourceCardService.refresh_card(
slug="public-procurements",
requested_by_id=10,
params={"region_code": "77", "current_year": "2026"},
)
self.assertEqual(result["source_card"], "public-procurements")
self.assertEqual(result["tasks"][0]["task_id"], "task-9")
self.assertEqual(
enqueue_mock.call_args.kwargs["kwargs"],
{
"requested_by_id": 10,
"region_code": "77",
"law_type": "44",
"current_year": 2026,
},
)
@patch(
"apps.parsers.source_cards.SourceCardService._enqueue_task",
side_effect=[
{
"task_id": "task-unfair",
"task_name": "apps.parsers.tasks.parse_unfair_suppliers",
},
{
"task_id": "task-goz",
"task_name": "apps.parsers.tasks.parse_fas_goz_evasion",
},
],
)
def test_refresh_card_for_defense_unreliable_suppliers_enqueues_sources(
self, enqueue_mock
):
result = SourceCardService.refresh_card(
slug="defense-unreliable-suppliers",
requested_by_id=10,
)
self.assertEqual(result["source_card"], "defense-unreliable-suppliers")
self.assertEqual(
[item["task_id"] for item in result["tasks"]],
["task-unfair", "task-goz"],
)
self.assertEqual(
[call.kwargs["meta"]["source"] for call in enqueue_mock.call_args_list],
[
ParserLoadLog.Source.UNFAIR_SUPPLIERS,
ParserLoadLog.Source.FAS_GOZ,
],
)
def test_launch_refresh_raises_for_unsupported_card(self):
definition = SourceCardDefinition(
slug="custom-source",
title="Custom",
description="Custom card",
order=999,
task_names=(),
source_items=(
SourceItemDefinition(
code="custom",
title="Custom",
description="Custom source",
),
),
)
with self.assertRaises(ValidationError) as error:
SourceCardService._launch_refresh(
definition,
requested_by_id=1,
params={},
)
self.assertIn(
"Обновление для карточки не поддерживается", str(error.exception.detail)
)
def test_enqueue_task_deletes_background_job_on_async_error(self):
task = MagicMock()
task.apply_async.side_effect = RuntimeError("broker down")
queryset = MagicMock()
with patch(
"apps.parsers.source_cards.uuid.uuid4", return_value="task-id-1"
), patch("apps.parsers.source_cards.BackgroundJobService.create_job"), patch(
"apps.parsers.source_cards.BackgroundJobService.get_queryset",
return_value=queryset,
), self.assertRaisesMessage(RuntimeError, "broker down"):
SourceCardService._enqueue_task(
task=task,
task_name="apps.parsers.tasks.sync_procurements",
requested_by_id=5,
meta={"source_card": "public-procurements"},
kwargs={"region_code": "77"},
)
queryset.filter.assert_called_once_with(task_id="task-id-1")
queryset.filter.return_value.delete.assert_called_once_with()
def test_helper_methods_cover_unknown_codes_and_status_variants(self):
self.assertEqual(SourceCardService._get_source_records_count("unknown"), 0)
self.assertEqual(
SourceCardService._get_source_organizations_count("unknown"), 0
)
self.assertIsNone(SourceCardService._get_source_data_timestamp("unknown"))
self.assertIsNone(SourceCardService._get_latest_load_by_source(None))
self.assertEqual(SourceCardService._get_status_label("custom"), "custom")
unavailable_definition = SourceCardDefinition(
slug="unavailable",
title="Unavailable",
description="Unavailable source",
order=1,
task_names=(),
source_items=(),
is_available=False,
)
in_progress_load = SimpleNamespace(status="in_progress")
failed_load = SimpleNamespace(status="failed")
self.assertEqual(
SourceCardService._get_status(
definition=unavailable_definition,
active_tasks=[],
latest_load=None,
last_updated_at=None,
),
"unavailable",
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[{"progress": 10}],
latest_load=None,
last_updated_at=None,
),
"in_progress",
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[],
latest_load=in_progress_load,
last_updated_at=None,
),
"in_progress",
)
stale_in_progress_load = SimpleNamespace(
status="in_progress",
updated_at=timezone.now() - timedelta(hours=3),
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[],
latest_load=stale_in_progress_load,
last_updated_at=None,
),
"error",
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[],
latest_load=failed_load,
last_updated_at=None,
),
"error",
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[],
latest_load=None,
last_updated_at=object(),
),
"success",
)
self.assertEqual(
SourceCardService._get_status(
definition=SourceCardService.get_definition("financial-indicators"),
active_tasks=[],
latest_load=None,
last_updated_at=None,
),
"idle",
)
@override_settings(PARSER_STALE_LOAD_MAX_AGE_MINUTES=90)
class SourceCardServiceDatabaseTest(TestCase):
def setUp(self):
SourceCardService.clear_cache()
def test_defense_unreliable_suppliers_counts_unique_generic_organizations(self):
_save_source_record(
source=ParserLoadLog.Source.UNFAIR_SUPPLIERS,
external_id="unfair-1",
inn="7701234567",
organization_name='ООО "Поставщик"',
title="Недобросовестный поставщик",
payload={"number": "unfair-1"},
)
_save_source_record(
source=ParserLoadLog.Source.FAS_GOZ,
external_id="goz-1",
inn="7701234567",
organization_name='ООО "Поставщик"',
title="Уклонение от ГОЗ",
payload={"number": "goz-1"},
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.UNFAIR_SUPPLIERS,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.FAS_GOZ,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
card = SourceCardService.get_card("defense-unreliable-suppliers")
self.assertEqual(card["status"], "success")
self.assertEqual(card["records_count"], 2)
self.assertEqual(card["organizations_count"], 1)
def test_public_procurements_counts_generic_eis_sources(self):
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
external_id="notice-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Закупка 44-ФЗ",
payload={"number": "notice-1"},
)
_save_source_record(
source=ParserLoadLog.Source.CONTRACTS,
external_id="contract-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Контракт ЕИС",
payload={"number": "contract-1"},
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.CONTRACTS,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
card = SourceCardService.get_card("public-procurements")
self.assertEqual(card["status"], "success")
self.assertEqual(card["records_count"], 2)
self.assertEqual(card["organizations_count"], 1)
def test_public_procurements_counts_generic_buyers_without_inn(self):
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
external_id="notice-1",
organization_name="ГБУ Заказчик",
title="Закупка 44-ФЗ",
payload={"Заказчик": "ГБУ Заказчик"},
)
_save_source_record(
source=ParserLoadLog.Source.CONTRACTS,
external_id="contract-1",
organization_name="ГБУ Заказчик",
title="Контракт ЕИС",
payload={"Заказчик": "ГБУ Заказчик"},
)
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_223FZ,
external_id="notice-2",
organization_name="АО Другой заказчик",
title="Закупка 223-ФЗ",
payload={"Наименование заказчика": "АО Другой заказчик"},
)
card = SourceCardService.get_card("public-procurements")
self.assertEqual(card["records_count"], 3)
self.assertEqual(card["organizations_count"], 2)
source_items = {item["code"]: item for item in card["source_items"]}
self.assertEqual(source_items["procurements_44fz"]["organizations_count"], 1)
self.assertEqual(source_items["procurements_223fz"]["organizations_count"], 1)
self.assertEqual(source_items["contracts"]["organizations_count"], 1)
def test_list_cards_uses_batched_aggregations(self):
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
external_id="notice-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Закупка 44-ФЗ",
)
_save_source_record(
source=ParserLoadLog.Source.CONTRACTS,
external_id="contract-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Контракт ЕИС",
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
ParserLoadLog.objects.create(
source=ParserLoadLog.Source.CONTRACTS,
batch_id=1,
records_count=1,
status=ParserLoadLog.Status.SUCCESS,
)
with CaptureQueriesContext(connection) as captured:
cards = SourceCardService.list_cards()
self.assertLessEqual(len(captured), 7)
procurements_card = next(
card for card in cards if card["slug"] == "public-procurements"
)
self.assertEqual(procurements_card["records_count"], 2)
self.assertEqual(procurements_card["organizations_count"], 1)
def test_list_cards_reuses_cached_aggregate_stats(self):
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
external_id="notice-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Закупка 44-ФЗ",
)
SourceCardService.list_cards()
with CaptureQueriesContext(connection) as captured:
cards = SourceCardService.list_cards()
self.assertLessEqual(len(captured), 2)
procurements_card = next(
card for card in cards if card["slug"] == "public-procurements"
)
self.assertEqual(procurements_card["records_count"], 1)
def test_ingestion_invalidates_source_card_aggregate_cache(self):
SourceCardService.list_cards()
_save_source_record(
source=ParserLoadLog.Source.PROCUREMENTS_44FZ,
external_id="notice-1",
inn="7701234567",
organization_name="ГБУ Заказчик",
title="Закупка 44-ФЗ",
)
cards = SourceCardService.list_cards()
procurements_card = next(
card for card in cards if card["slug"] == "public-procurements"
)
self.assertEqual(procurements_card["records_count"], 1)
def test_get_active_tasks_ignores_old_jobs_even_when_updated_recently(self):
job = BackgroundJob.objects.create(
task_id="old-source-task",
task_name="apps.parsers.tasks.parse_industrial_products",
status=JobStatus.STARTED,
progress=10,
meta={"source": "industrial_products"},
)
old_timestamp = timezone.now() - timedelta(hours=3)
BackgroundJob.objects.filter(pk=job.pk).update(
created_at=old_timestamp,
started_at=old_timestamp,
updated_at=timezone.now(),
)
tasks = SourceCardService._get_active_tasks(
SourceCardService.get_definition("manufacturers-and-products")
)
self.assertEqual(tasks, [])
def test_get_active_tasks_keeps_recent_pending_jobs(self):
BackgroundJob.objects.create(
task_id="fresh-source-task",
task_name="apps.parsers.tasks.parse_industrial_products",
status=JobStatus.PENDING,
progress=0,
meta={"source": "industrial_products"},
)
tasks = SourceCardService._get_active_tasks(
SourceCardService.get_definition("manufacturers-and-products")
)
self.assertEqual(len(tasks), 1)
self.assertEqual(tasks[0]["task_id"], "fresh-source-task")