fix remaining parser source failures
Some checks failed
CI/CD Pipeline / Manual Action Help (push) Has been skipped
CI/CD Pipeline / Start Dev Containers in Dokploy (push) Has been skipped
CI/CD Pipeline / Drop and Recreate Dev Database (push) Has been skipped
CI/CD Pipeline / Quality Gate (push) Successful in 54s
CI/CD Pipeline / Build and Push Images (push) Failing after 2m59s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Has been skipped
CI/CD Pipeline / Internal Notify (push) Successful in 1s
Some checks failed
CI/CD Pipeline / Manual Action Help (push) Has been skipped
CI/CD Pipeline / Start Dev Containers in Dokploy (push) Has been skipped
CI/CD Pipeline / Drop and Recreate Dev Database (push) Has been skipped
CI/CD Pipeline / Quality Gate (push) Successful in 54s
CI/CD Pipeline / Build and Push Images (push) Failing after 2m59s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Has been skipped
CI/CD Pipeline / Internal Notify (push) Successful in 1s
This commit is contained in:
@@ -11,7 +11,9 @@ from dataclasses import dataclass, field
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import urllib3
|
||||||
from requests.adapters import BaseAdapter
|
from requests.adapters import BaseAdapter
|
||||||
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -96,6 +98,8 @@ class BaseHTTPClient:
|
|||||||
def _create_session(self) -> requests.Session:
|
def _create_session(self) -> requests.Session:
|
||||||
"""Создать и настроить сессию requests."""
|
"""Создать и настроить сессию requests."""
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
if not self.verify_ssl:
|
||||||
|
urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
|
|
||||||
# Настройка прокси
|
# Настройка прокси
|
||||||
self._current_proxy = self._select_proxy()
|
self._current_proxy = self._select_proxy()
|
||||||
|
|||||||
@@ -252,6 +252,8 @@ PARSER_SOURCES: dict[str, ParserSourceDescriptor] = {
|
|||||||
parser_strategy="fedresurs_bankruptcy_search",
|
parser_strategy="fedresurs_bankruptcy_search",
|
||||||
source_notes=(
|
source_notes=(
|
||||||
"Официальный ЕФРСБ; может отдавать anti-bot challenge worker'ам. "
|
"Официальный ЕФРСБ; может отдавать anti-bot challenge worker'ам. "
|
||||||
|
"Если официальный портал недоступен, используется Checko API по "
|
||||||
|
"организациям из реестров. "
|
||||||
"Ручная загрузка разрешена только для выгрузок, переданных Сергеем."
|
"Ручная загрузка разрешена только для выгрузок, переданных Сергеем."
|
||||||
),
|
),
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Celery задачи для приложения парсеров.
|
|||||||
Интегрируются с BackgroundJob для отслеживания прогресса.
|
Интегрируются с BackgroundJob для отслеживания прогресса.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
@@ -14,6 +15,8 @@ from pathlib import Path
|
|||||||
|
|
||||||
from apps.core.services import BackgroundJobService
|
from apps.core.services import BackgroundJobService
|
||||||
from apps.core.tasks import PeriodicTask as CorePeriodicTask
|
from apps.core.tasks import PeriodicTask as CorePeriodicTask
|
||||||
|
from apps.parsers.clients.checko import CheckoClient, CompanyRequest
|
||||||
|
from apps.parsers.clients.checko.exceptions import CheckoError
|
||||||
from apps.parsers.clients.common import GenericParserItem, StructuredDataClient
|
from apps.parsers.clients.common import GenericParserItem, StructuredDataClient
|
||||||
from apps.parsers.clients.minpromtorg import (
|
from apps.parsers.clients.minpromtorg import (
|
||||||
IndustrialProductionClient,
|
IndustrialProductionClient,
|
||||||
@@ -38,6 +41,8 @@ from apps.parsers.services import (
|
|||||||
)
|
)
|
||||||
from apps.parsers.source_registry import PARSER_SOURCES
|
from apps.parsers.source_registry import PARSER_SOURCES
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
from django.conf import settings
|
||||||
|
from registers.models import Organization
|
||||||
from requests.adapters import BaseAdapter
|
from requests.adapters import BaseAdapter
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -45,6 +50,11 @@ logger = logging.getLogger(__name__)
|
|||||||
# Константы для синхронизации проверок
|
# Константы для синхронизации проверок
|
||||||
DEFAULT_START_YEAR = 2025
|
DEFAULT_START_YEAR = 2025
|
||||||
DEFAULT_START_MONTH = 1
|
DEFAULT_START_MONTH = 1
|
||||||
|
STRUCTURED_SOURCE_OPTIONS = {
|
||||||
|
"fstec": {"verify_ssl": False},
|
||||||
|
"fedresurs_bankruptcy": {"timeout": 30},
|
||||||
|
}
|
||||||
|
FEDRESURS_CHECKO_FALLBACK_LIMIT = 100
|
||||||
|
|
||||||
|
|
||||||
def _resolve_proxies(proxies: list[str] | None) -> list[str] | None:
|
def _resolve_proxies(proxies: list[str] | None) -> list[str] | None:
|
||||||
@@ -151,7 +161,11 @@ def _fetch_structured_records(
|
|||||||
) -> list[GenericParserItem]:
|
) -> list[GenericParserItem]:
|
||||||
"""Загрузить records через structured client из URL или локального storage."""
|
"""Загрузить records через structured client из URL или локального storage."""
|
||||||
descriptor = PARSER_SOURCES[source_key]
|
descriptor = PARSER_SOURCES[source_key]
|
||||||
client = StructuredDataClient(source=source_key, proxies=proxies)
|
client = StructuredDataClient(
|
||||||
|
source=source_key,
|
||||||
|
proxies=proxies,
|
||||||
|
**STRUCTURED_SOURCE_OPTIONS.get(source_key, {}),
|
||||||
|
)
|
||||||
if file_path:
|
if file_path:
|
||||||
from django.core.files.storage import default_storage
|
from django.core.files.storage import default_storage
|
||||||
|
|
||||||
@@ -164,6 +178,152 @@ def _fetch_structured_records(
|
|||||||
return client.fetch_records(file_url=file_url or descriptor.upstream_url)
|
return client.fetch_records(file_url=file_url or descriptor.upstream_url)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_fedresurs_bankruptcy_records(
|
||||||
|
*,
|
||||||
|
file_url: str | None,
|
||||||
|
file_path: str | None,
|
||||||
|
proxies: list[str] | None,
|
||||||
|
) -> list[GenericParserItem]:
|
||||||
|
"""Загрузить банкротства: официальный портал, затем fallback через Checko."""
|
||||||
|
try:
|
||||||
|
return _fetch_structured_records(
|
||||||
|
source_key="fedresurs_bankruptcy",
|
||||||
|
file_url=file_url,
|
||||||
|
file_path=file_path,
|
||||||
|
proxies=proxies,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
if file_url or file_path:
|
||||||
|
raise
|
||||||
|
logger.warning(
|
||||||
|
"Fedresurs official source failed, falling back to Checko: %s",
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return _fetch_checko_bankruptcy_records(proxies=proxies)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_checko_bankruptcy_records(
|
||||||
|
*,
|
||||||
|
proxies: list[str] | None,
|
||||||
|
) -> list[GenericParserItem]:
|
||||||
|
"""Получить ЕФРСБ-сообщения по организациям из наших реестров через Checko."""
|
||||||
|
api_key = getattr(settings, "CHECKO_API_KEY", "")
|
||||||
|
if not api_key:
|
||||||
|
logger.warning("CHECKO_API_KEY is empty; Fedresurs fallback skipped")
|
||||||
|
return []
|
||||||
|
|
||||||
|
limit = getattr(
|
||||||
|
settings,
|
||||||
|
"FEDRESURS_CHECKO_FALLBACK_LIMIT",
|
||||||
|
FEDRESURS_CHECKO_FALLBACK_LIMIT,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
limit = int(limit)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
limit = FEDRESURS_CHECKO_FALLBACK_LIMIT
|
||||||
|
if limit <= 0:
|
||||||
|
logger.info("Fedresurs Checko fallback is disabled by limit=%s", limit)
|
||||||
|
return []
|
||||||
|
organizations = list(
|
||||||
|
Organization.objects.order_by("mn_inn").values(
|
||||||
|
"mn_inn",
|
||||||
|
"mn_ogrn",
|
||||||
|
"pn_name",
|
||||||
|
)[:limit]
|
||||||
|
)
|
||||||
|
if not organizations:
|
||||||
|
logger.info("No registry organizations found for Fedresurs fallback")
|
||||||
|
return []
|
||||||
|
|
||||||
|
checko_proxies = (
|
||||||
|
proxies if getattr(settings, "CHECKO_USE_RUNTIME_PROXIES", False) else None
|
||||||
|
)
|
||||||
|
client = CheckoClient(api_key=api_key, proxies=checko_proxies, timeout=30)
|
||||||
|
records: list[GenericParserItem] = []
|
||||||
|
for organization in organizations:
|
||||||
|
inn = str(organization["mn_inn"])
|
||||||
|
ogrn = str(organization["mn_ogrn"])
|
||||||
|
name = organization["pn_name"]
|
||||||
|
try:
|
||||||
|
response = client.get_company(CompanyRequest(inn=inn))
|
||||||
|
except CheckoError as exc:
|
||||||
|
logger.info("Checko bankruptcy lookup skipped for inn=%s: %s", inn, exc)
|
||||||
|
continue
|
||||||
|
company = response.data
|
||||||
|
if company is None:
|
||||||
|
continue
|
||||||
|
records.extend(
|
||||||
|
_checko_bankruptcy_items(
|
||||||
|
company=company,
|
||||||
|
fallback_inn=inn,
|
||||||
|
fallback_ogrn=ogrn,
|
||||||
|
fallback_name=name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info("Fetched %d bankruptcy records through Checko fallback", len(records))
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def _checko_bankruptcy_items(
|
||||||
|
*,
|
||||||
|
company,
|
||||||
|
fallback_inn: str,
|
||||||
|
fallback_ogrn: str,
|
||||||
|
fallback_name: str,
|
||||||
|
) -> list[GenericParserItem]:
|
||||||
|
"""Преобразовать банкротные сообщения Checko в generic records."""
|
||||||
|
records: list[GenericParserItem] = []
|
||||||
|
inn = str(getattr(company, "inn", "") or fallback_inn)
|
||||||
|
ogrn = str(getattr(company, "ogrn", "") or fallback_ogrn)
|
||||||
|
name = getattr(company, "short_name", None) or fallback_name
|
||||||
|
for message in getattr(company, "bankruptcy", ()):
|
||||||
|
message_type = getattr(message, "type", "") or "Сообщение ЕФРСБ"
|
||||||
|
message_date = getattr(message, "date", "") or ""
|
||||||
|
case_number = getattr(message, "case_number", None) or ""
|
||||||
|
external_id = _fedresurs_external_id(
|
||||||
|
inn=inn,
|
||||||
|
message_type=message_type,
|
||||||
|
message_date=message_date,
|
||||||
|
case_number=case_number,
|
||||||
|
)
|
||||||
|
records.append(
|
||||||
|
GenericParserItem(
|
||||||
|
source="fedresurs_bankruptcy",
|
||||||
|
external_id=external_id,
|
||||||
|
inn=inn,
|
||||||
|
ogrn=ogrn,
|
||||||
|
organisation_name=name,
|
||||||
|
title=message_type,
|
||||||
|
record_date=message_date,
|
||||||
|
status=message_type,
|
||||||
|
payload={
|
||||||
|
"provider": "checko",
|
||||||
|
"declared_source": "ЕФРСБ",
|
||||||
|
"inn": inn,
|
||||||
|
"ogrn": ogrn,
|
||||||
|
"organisation_name": name,
|
||||||
|
"type": message_type,
|
||||||
|
"date": message_date,
|
||||||
|
"case_number": case_number,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def _fedresurs_external_id(
|
||||||
|
*,
|
||||||
|
inn: str,
|
||||||
|
message_type: str,
|
||||||
|
message_date: str,
|
||||||
|
case_number: str,
|
||||||
|
) -> str:
|
||||||
|
"""Стабильный ID для ЕФРСБ-сообщения из fallback-источника."""
|
||||||
|
raw = f"{inn}:{message_type}:{message_date}:{case_number}"
|
||||||
|
digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:24]
|
||||||
|
return f"checko-fedresurs:{digest}"
|
||||||
|
|
||||||
|
|
||||||
@shared_task(bind=True, base=CorePeriodicTask)
|
@shared_task(bind=True, base=CorePeriodicTask)
|
||||||
def sync_ru_proxies(self) -> dict[str, int | str]: # noqa: ARG001
|
def sync_ru_proxies(self) -> dict[str, int | str]: # noqa: ARG001
|
||||||
"""Периодически загружать RU-прокси из Proxy-Tools."""
|
"""Периодически загружать RU-прокси из Proxy-Tools."""
|
||||||
@@ -1665,8 +1825,7 @@ def parse_fedresurs_bankruptcy(
|
|||||||
source=ParserLoadLog.Source.FEDRESURS_BANKRUPTCY,
|
source=ParserLoadLog.Source.FEDRESURS_BANKRUPTCY,
|
||||||
task_name="apps.parsers.tasks.parse_fedresurs_bankruptcy",
|
task_name="apps.parsers.tasks.parse_fedresurs_bankruptcy",
|
||||||
requested_by_id=requested_by_id,
|
requested_by_id=requested_by_id,
|
||||||
fetch_records=lambda: _fetch_structured_records(
|
fetch_records=lambda: _fetch_fedresurs_bankruptcy_records(
|
||||||
source_key="fedresurs_bankruptcy",
|
|
||||||
file_url=file_url,
|
file_url=file_url,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ from apps.parsers.tasks import (
|
|||||||
)
|
)
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
from openpyxl import Workbook
|
from openpyxl import Workbook
|
||||||
|
from registers.models import Organization
|
||||||
|
|
||||||
from tests.apps.parsers.factories import (
|
from tests.apps.parsers.factories import (
|
||||||
InspectionRecordFactory,
|
InspectionRecordFactory,
|
||||||
@@ -155,6 +156,92 @@ class SyncRuProxiesTaskTestCase(TestCase):
|
|||||||
sync_mock.assert_called_once_with()
|
sync_mock.assert_called_once_with()
|
||||||
|
|
||||||
|
|
||||||
|
class GenericSourceFetchTestCase(TestCase):
|
||||||
|
"""Tests for source-specific generic fetch configuration."""
|
||||||
|
|
||||||
|
def test_fstec_disables_ssl_verification_for_broken_certificate_chain(self):
|
||||||
|
class _RecordingStructuredClient:
|
||||||
|
instances = []
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.kwargs = kwargs
|
||||||
|
self.instances.append(self)
|
||||||
|
|
||||||
|
def fetch_records(self, **_kwargs):
|
||||||
|
return []
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
parser_tasks,
|
||||||
|
"StructuredDataClient",
|
||||||
|
_RecordingStructuredClient,
|
||||||
|
):
|
||||||
|
records = parser_tasks._fetch_structured_records(
|
||||||
|
source_key="fstec",
|
||||||
|
file_url="https://reestr.fstec.ru/reg3",
|
||||||
|
file_path=None,
|
||||||
|
proxies=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(records, [])
|
||||||
|
self.assertEqual(len(_RecordingStructuredClient.instances), 1)
|
||||||
|
self.assertEqual(
|
||||||
|
_RecordingStructuredClient.instances[0].kwargs["source"],
|
||||||
|
"fstec",
|
||||||
|
)
|
||||||
|
self.assertFalse(_RecordingStructuredClient.instances[0].kwargs["verify_ssl"])
|
||||||
|
|
||||||
|
@override_settings(CHECKO_API_KEY="test-key", FEDRESURS_CHECKO_FALLBACK_LIMIT=10)
|
||||||
|
def test_fedresurs_falls_back_to_checko_for_registry_organizations(self):
|
||||||
|
organization = Organization.objects.create(
|
||||||
|
pn_name='ООО "Тест"',
|
||||||
|
mn_ogrn=1027700000000,
|
||||||
|
mn_inn=7701000001,
|
||||||
|
in_kpp=770101001,
|
||||||
|
mn_okpo="12345678",
|
||||||
|
)
|
||||||
|
|
||||||
|
class _CheckoClient:
|
||||||
|
def __init__(self, **_kwargs):
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_company(self, _request):
|
||||||
|
return SimpleNamespace(
|
||||||
|
data=SimpleNamespace(
|
||||||
|
ogrn=str(organization.mn_ogrn),
|
||||||
|
inn=str(organization.mn_inn),
|
||||||
|
short_name=organization.pn_name,
|
||||||
|
bankruptcy=(
|
||||||
|
SimpleNamespace(
|
||||||
|
type="Сообщение о введении наблюдения",
|
||||||
|
date="2026-04-01",
|
||||||
|
case_number="А40-1/2026",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(
|
||||||
|
parser_tasks,
|
||||||
|
"_fetch_structured_records",
|
||||||
|
side_effect=RuntimeError("HTTP 401"),
|
||||||
|
),
|
||||||
|
patch.object(parser_tasks, "CheckoClient", _CheckoClient),
|
||||||
|
):
|
||||||
|
records = parser_tasks._fetch_fedresurs_bankruptcy_records(
|
||||||
|
file_url=None,
|
||||||
|
file_path=None,
|
||||||
|
proxies=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(len(records), 1)
|
||||||
|
self.assertEqual(records[0].source, "fedresurs_bankruptcy")
|
||||||
|
self.assertEqual(records[0].inn, str(organization.mn_inn))
|
||||||
|
self.assertEqual(records[0].ogrn, str(organization.mn_ogrn))
|
||||||
|
self.assertEqual(records[0].record_date, "2026-04-01")
|
||||||
|
self.assertEqual(records[0].payload["provider"], "checko")
|
||||||
|
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
CELERY_TASK_ALWAYS_EAGER=True,
|
CELERY_TASK_ALWAYS_EAGER=True,
|
||||||
CELERY_TASK_EAGER_PROPAGATES=True,
|
CELERY_TASK_EAGER_PROPAGATES=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user