fix(parsers): handle stale and blocked sources
Some checks failed
CI/CD Pipeline / Manual Action Help (push) Has been skipped
CI/CD Pipeline / Build Golden Images (push) Has been skipped
CI/CD Pipeline / Quality Gate (push) Failing after 1s
CI/CD Pipeline / Start Dev Containers in Dokploy (push) Has been skipped
CI/CD Pipeline / Drop and Recreate Dev Database (push) Has been skipped
CI/CD Pipeline / Build and Push Images (push) Has been skipped
CI/CD Pipeline / Deploy Dev in Dokploy (push) Has been skipped
CI/CD Pipeline / Internal Notify (push) Successful in 1s

This commit is contained in:
2026-04-28 20:55:43 +02:00
parent 77d84b9778
commit 4bcac334cd
5 changed files with 315 additions and 7 deletions

View File

@@ -1,8 +1,10 @@
"""Tests for parsers services."""
from datetime import timedelta
from unittest.mock import patch
from urllib.parse import urlparse
from apps.core.models import BackgroundJob, JobStatus
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.schemas import (
IndustrialCertificate,
@@ -32,6 +34,7 @@ from apps.parsers.services import (
)
from apps.registers.models import Organization
from django.test import TestCase, override_settings, tag
from django.utils import timezone
from tests.utils import TestHTTPServer
from tests.utils.fixtures import build_minpromtorg_certificates_excel, fake
@@ -367,6 +370,47 @@ class ParserLoadLogServiceTest(TestCase):
log.refresh_from_db()
self.assertEqual(log.records_count, 250)
def test_mark_stale_in_progress_failed_marks_old_logs(self):
"""Old in_progress logs without a fresh active job are closed."""
log = ParserLoadLogFactory(
source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS,
batch_id=1,
status=ParserLoadLog.Status.IN_PROGRESS,
)
ParserLoadLog.objects.filter(pk=log.pk).update(
updated_at=timezone.now() - timedelta(hours=3)
)
updated = ParserLoadLogService.mark_stale_in_progress_failed(max_age_minutes=90)
log.refresh_from_db()
self.assertEqual(updated, 1)
self.assertEqual(log.status, ParserLoadLog.Status.FAILED)
self.assertIn("Stale parser load", log.error_message)
def test_mark_stale_in_progress_failed_keeps_fresh_active_job(self):
"""A fresh active BackgroundJob keeps the matching load in progress."""
log = ParserLoadLogFactory(
source=ParserLoadLog.Source.INDUSTRIAL_PRODUCTS,
batch_id=1,
status=ParserLoadLog.Status.IN_PROGRESS,
)
ParserLoadLog.objects.filter(pk=log.pk).update(
updated_at=timezone.now() - timedelta(hours=3)
)
BackgroundJob.objects.create(
task_id="active-task",
task_name="apps.parsers.tasks.parse_industrial_products",
status=JobStatus.STARTED,
meta={"source": log.source, "batch_id": log.batch_id},
)
updated = ParserLoadLogService.mark_stale_in_progress_failed(max_age_minutes=90)
log.refresh_from_db()
self.assertEqual(updated, 0)
self.assertEqual(log.status, ParserLoadLog.Status.IN_PROGRESS)
class IndustrialCertificateServiceTest(TestCase):
"""Tests for IndustrialCertificateService."""

View File

@@ -13,6 +13,7 @@ from unittest.mock import patch
from urllib.parse import urlparse
from apps.parsers import tasks as parser_tasks
from apps.parsers.clients.base import HTTPError
from apps.parsers.clients.minpromtorg.industrial import (
IndustrialProductionClient,
IndustrialProductionClientError,
@@ -36,6 +37,7 @@ from apps.parsers.models import (
ParserLoadLog,
ProcurementRecord,
)
from apps.parsers.services import ParserLoadLogService
from apps.parsers.tasks import (
_move_to_dir,
_process_fns_file_sync,
@@ -241,6 +243,56 @@ class GenericSourceFetchTestCase(TestCase):
self.assertEqual(records[0].record_date, "2026-04-01")
self.assertEqual(records[0].payload["provider"], "checko")
@override_settings(CHECKO_API_KEY="")
def test_fedresurs_skips_when_official_blocked_and_fallback_empty(self):
with patch.object(
parser_tasks,
"_fetch_structured_records",
side_effect=HTTPError(
"HTTP 401 for https://bankrot.fedresurs.ru/",
status_code=401,
url="https://bankrot.fedresurs.ru/",
),
):
result = parser_tasks.parse_fedresurs_bankruptcy(proxies=[])
log = ParserLoadLog.objects.get(
source=ParserLoadLog.Source.FEDRESURS_BANKRUPTCY
)
self.assertEqual(result["status"], "skipped")
self.assertEqual(log.status, ParserLoadLog.Status.SKIPPED)
self.assertIn("fedresurs upstream", log.error_message)
def test_fstec_skips_when_upstream_is_blocked(self):
with patch.object(
parser_tasks,
"_fetch_structured_records",
side_effect=HTTPError(
"HTTP 403 for https://reestr.fstec.ru/reg3",
status_code=403,
url="https://reestr.fstec.ru/reg3",
),
):
result = parser_tasks.parse_fstec_registers(proxies=[])
log = ParserLoadLog.objects.get(source=ParserLoadLog.Source.FSTEC)
self.assertEqual(result["status"], "skipped")
self.assertEqual(log.status, ParserLoadLog.Status.SKIPPED)
self.assertIn("fstec upstream", log.error_message)
def test_cleanup_stale_parser_loads_returns_count(self):
with patch.object(
ParserLoadLogService,
"mark_stale_in_progress_failed",
return_value=2,
) as cleanup_mock:
result = parser_tasks.cleanup_stale_parser_loads(max_age_minutes=45)
cleanup_mock.assert_called_once_with(max_age_minutes=45)
self.assertEqual(result["status"], "success")
self.assertEqual(result["marked_failed"], 2)
self.assertEqual(result["max_age_minutes"], 45)
@override_settings(
CELERY_TASK_ALWAYS_EAGER=True,