From c72343a37512a7eac9743b868a67e01c109e25e7 Mon Sep 17 00:00:00 2001 From: Aleksandr Meshchriakov Date: Tue, 28 Apr 2026 13:58:55 +0200 Subject: [PATCH] fix parser schedule run issues --- .gitea/workflows/ci-cd.yml | 34 +++++++- .../0019_widen_generic_record_date.py | 18 ++++ src/apps/parsers/models.py | 2 +- src/apps/parsers/tasks.py | 86 ++++++++++++++----- tests/apps/parsers/test_models.py | 10 +++ tests/apps/parsers/test_tasks.py | 28 ++++++ tests/apps/parsers/test_views.py | 26 ++++++ 7 files changed, 179 insertions(+), 25 deletions(-) create mode 100644 src/apps/parsers/migrations/0019_widen_generic_record_date.py diff --git a/.gitea/workflows/ci-cd.yml b/.gitea/workflows/ci-cd.yml index cb16219..510b7d5 100644 --- a/.gitea/workflows/ci-cd.yml +++ b/.gitea/workflows/ci-cd.yml @@ -764,8 +764,40 @@ jobs: fi } + wait_for_migrations() { + export PGPASSWORD="${POSTGRES_PASSWORD}" + for attempt in $(seq 1 60); do + SCHEMA_STATE=$(psql \ + --set ON_ERROR_STOP=1 \ + --host="${POSTGRES_HOST}" \ + --port="${POSTGRES_PORT}" \ + --username="${POSTGRES_USER}" \ + --dbname="${POSTGRES_DB}" \ + --tuples-only \ + --no-align \ + <<'SQL' + SELECT CASE + WHEN to_regclass('public.django_migrations') IS NOT NULL + AND to_regclass('public.core_backgroundjob') IS NOT NULL + THEN 'ready' + ELSE 'waiting' + END; + SQL + ) + if [ "${SCHEMA_STATE}" = "ready" ]; then + echo "Database schema is ready after web deploy" + return 0 + fi + echo "Waiting for web migrations (${attempt}/60)" + sleep 5 + done + + echo "Database schema was not ready after web deploy" >&2 + exit 1 + } + call_webhook "dev web" "${DOKPLOY_DEV_WEB_WEBHOOK_URL}" "web" - sleep 45 + wait_for_migrations call_webhook "dev worker" "${DOKPLOY_DEV_WORKER_WEBHOOK_URL}" "worker" call_webhook "dev beat" "${DOKPLOY_DEV_BEAT_WEBHOOK_URL}" "beat" diff --git a/src/apps/parsers/migrations/0019_widen_generic_record_date.py b/src/apps/parsers/migrations/0019_widen_generic_record_date.py new file mode 100644 index 0000000..8b5a0e4 --- /dev/null +++ b/src/apps/parsers/migrations/0019_widen_generic_record_date.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2026-04-28 11:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('parsers', '0018_seed_weekly_parser_schedules'), + ] + + operations = [ + migrations.AlterField( + model_name='genericparserrecord', + name='record_date', + field=models.CharField(blank=True, db_index=True, help_text='Дата записи в формате источника', max_length=255, verbose_name='дата записи'), + ), + ] diff --git a/src/apps/parsers/models.py b/src/apps/parsers/models.py index 2f73158..71dba17 100644 --- a/src/apps/parsers/models.py +++ b/src/apps/parsers/models.py @@ -405,7 +405,7 @@ class GenericParserRecord(TimestampMixin, models.Model): ) record_date = models.CharField( _("дата записи"), - max_length=30, + max_length=255, blank=True, db_index=True, help_text=_("Дата записи в формате источника"), diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index 03a4769..04e7a3e 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -914,6 +914,11 @@ def sync_inspections( # noqa: C901 proxies: list[str] | None = None, client_adapter: BaseAdapter | None = None, use_playwright: bool | None = None, + max_months_per_law: int | None = None, + start_year: int | None = None, + start_month: int | None = None, + include_fz294: bool | None = None, + include_fz248: bool | None = None, current_year: int | None = None, current_month: int | None = None, requested_by_id: int | None = None, @@ -932,6 +937,11 @@ def sync_inspections( # noqa: C901 proxies: Список прокси-серверов (опционально) client_adapter: HTTP-адаптер (опционально). use_playwright: Использовать Playwright (опционально). + max_months_per_law: Максимум месяцев для каждого закона. + start_year: Год стартового периода, если нужно переопределить resume. + start_month: Месяц стартового периода, если нужно переопределить resume. + include_fz294: Загружать проверки по ФЗ-294. + include_fz248: Загружать проверки по ФЗ-248. current_year: Год (опционально) для ограничения периода. current_month: Месяц (опционально) для ограничения периода. @@ -967,6 +977,10 @@ def sync_inspections( # noqa: C901 now = datetime.now() current_year = current_year or now.year current_month = current_month or now.month + include_fz294 = True if include_fz294 is None else include_fz294 + include_fz248 = True if include_fz248 is None else include_fz248 + if max_months_per_law is not None: + max_months_per_law = max(1, int(max_months_per_law)) total_saved = 0 results = {"fz294": [], "fz248": []} @@ -978,43 +992,68 @@ def sync_inspections( # noqa: C901 client_kwargs["use_playwright"] = use_playwright with ProverkiClient(**client_kwargs) as client: # Обрабатываем оба типа проверок - for is_fz248 in [False, True]: + law_modes = [] + if include_fz294: + law_modes.append(False) + if include_fz248: + law_modes.append(True) + + for is_fz248 in law_modes: fz_key = "fz248" if is_fz248 else "fz294" fz_name = "ФЗ-248" if is_fz248 else "ФЗ-294" # Определяем начальную точку - last_year, last_month = InspectionService.get_last_loaded_period( - is_federal_law_248=is_fz248 - ) - - if last_year and last_month: - # Начинаем со следующего месяца после последнего загруженного - start_year, start_month = _get_next_month(last_year, last_month) + if start_year and start_month: + year, month = start_year, start_month logger.info( - "%s: continuing from %d/%d (last loaded: %d/%d)", + "%s: starting from explicit period %d/%d", fz_name, - start_year, - start_month, - last_year, - last_month, + year, + month, ) else: - # Начинаем с дефолтной даты - start_year, start_month = DEFAULT_START_YEAR, DEFAULT_START_MONTH - logger.info( - "%s: no data in DB, starting from %d/%d", - fz_name, - start_year, - start_month, + last_year, last_month = InspectionService.get_last_loaded_period( + is_federal_law_248=is_fz248 ) - # Загружаем месяц за месяцем - year, month = start_year, start_month + if last_year and last_month: + # Начинаем со следующего месяца после последнего загруженного + year, month = _get_next_month(last_year, last_month) + logger.info( + "%s: continuing from %d/%d (last loaded: %d/%d)", + fz_name, + year, + month, + last_year, + last_month, + ) + else: + # Начинаем с дефолтной даты + year, month = DEFAULT_START_YEAR, DEFAULT_START_MONTH + logger.info( + "%s: no data in DB, starting from %d/%d", + fz_name, + year, + month, + ) + empty_months_count = 0 + processed_months = 0 while year < current_year or ( year == current_year and month <= current_month ): + if ( + max_months_per_law is not None + and processed_months >= max_months_per_law + ): + logger.info( + "%s: stopping after %d processed months by request limit", + fz_name, + processed_months, + ) + break + # Прекращаем если 2 месяца подряд нет данных if empty_months_count >= 2: logger.info( @@ -1082,7 +1121,8 @@ def sync_inspections( # noqa: C901 ) empty_months_count += 1 - # Переходим к следующему месяцу + processed_months += 1 + # Переходим к следующему месяцу. year, month = _get_next_month(year, month) # Обновляем лог diff --git a/tests/apps/parsers/test_models.py b/tests/apps/parsers/test_models.py index 27aaf62..099a2b4 100644 --- a/tests/apps/parsers/test_models.py +++ b/tests/apps/parsers/test_models.py @@ -1,6 +1,7 @@ """Tests for parsers models.""" from apps.parsers.models import ( + GenericParserRecord, IndustrialCertificateRecord, IndustrialProductRecord, ManufacturerRecord, @@ -97,6 +98,15 @@ class ParserLoadLogModelTest(TestCase): self.assertIsNotNone(log.updated_at) +class GenericParserRecordModelTest(TestCase): + """Tests for generic parser records.""" + + def test_record_date_allows_source_specific_long_values(self): + field = GenericParserRecord._meta.get_field("record_date") + + self.assertEqual(field.max_length, 255) + + class IndustrialCertificateRecordModelTest(TestCase): """Tests for IndustrialCertificateRecord model.""" diff --git a/tests/apps/parsers/test_tasks.py b/tests/apps/parsers/test_tasks.py index f8e3f5e..538236c 100644 --- a/tests/apps/parsers/test_tasks.py +++ b/tests/apps/parsers/test_tasks.py @@ -896,6 +896,34 @@ class ParseInspectionsTaskTestCase(TestCase): self.assertEqual(result["status"], "success") self.assertEqual(result["total_saved"], 0) + def test_sync_inspections_honors_limited_params(self): + xml_content, rows = build_proverki_xml(count=1) + archive = build_zip([("inspections.xml", xml_content)]) + + with TestHTTPServer() as server: + server.add_bytes( + _portal_path(2026, 4), + archive, + content_type="application/zip", + ) + result = sync_inspections( + proxies=[], + client_adapter=server.adapter, + use_playwright=False, + max_months_per_law=1, + start_year=2026, + start_month=4, + include_fz294=True, + include_fz248=False, + current_year=2026, + current_month=5, + ) + + self.assertEqual(result["status"], "success") + self.assertEqual(len(result["results"]["fz294"]), 1) + self.assertEqual(result["results"]["fz248"], []) + self.assertGreaterEqual(result["total_saved"], len(rows)) + def test_sync_inspections_resumes_from_last_loaded(self): last_year = 2024 last_month = 12 diff --git a/tests/apps/parsers/test_views.py b/tests/apps/parsers/test_views.py index bf9d671..4eb3aac 100644 --- a/tests/apps/parsers/test_views.py +++ b/tests/apps/parsers/test_views.py @@ -5,6 +5,7 @@ from __future__ import annotations import io import os import tempfile +from unittest.mock import Mock, patch from apps.parsers.models import FinancialReport, FinancialReportLine, ProcurementRecord from django.core.files.uploadedfile import SimpleUploadedFile @@ -341,3 +342,28 @@ class ParsersViewSetTest(APITestCase): self.assertEqual(updated.status_code, status.HTTP_200_OK) self.assertEqual(updated.data["planned_inspections"], "weekly") + + def test_run_sync_inspections_accepts_limited_sync_params(self): + self.client.force_authenticate(self.user) + url = reverse("api_v1:parsers:run-parser", args=["sync_inspections"]) + payload = { + "max_months_per_law": 1, + "start_year": 2026, + "start_month": 4, + "include_fz294": True, + "include_fz248": False, + "current_year": 2026, + "current_month": 4, + } + + with patch( + "apps.parsers.views.tasks.sync_inspections.apply_async", + return_value=Mock(id="task-123"), + ) as apply_async_mock: + response = self.client.post(url, payload, format="json") + + self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) + task_kwargs = apply_async_mock.call_args.kwargs["kwargs"] + for key, value in payload.items(): + self.assertEqual(task_kwargs[key], value) + self.assertEqual(task_kwargs["requested_by_id"], self.user.id)