diff --git a/.env.example b/.env.example index f3fe2f2..58ecefd 100644 --- a/.env.example +++ b/.env.example @@ -28,4 +28,8 @@ CORS_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000 LOG_LEVEL=INFO # Scrapy Settings -SCRAPY_LOG_LEVEL=INFO \ No newline at end of file +SCRAPY_LOG_LEVEL=INFO + +# Parsers API Tokens +# Токен для zakupki.gov.ru (получить через Госуслуги на https://zakupki.gov.ru/pmd/auth/welcome) +ZAKUPKI_TOKEN= \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8ef9cb4..40f5676 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,5 @@ Thumbs.db # Backup files *.bak *.backupdata/ +data/ +.zed/ diff --git a/src/apps/parsers/clients/base.py b/src/apps/parsers/clients/base.py index 9bfbd5a..7717f97 100644 --- a/src/apps/parsers/clients/base.py +++ b/src/apps/parsers/clients/base.py @@ -189,6 +189,65 @@ class BaseHTTPClient: logger.debug("Response %d from %s", response.status_code, url) return response + def post( + self, + endpoint: str, + data: bytes | str | None = None, + json: dict[str, Any] | None = None, + headers: dict[str, str] | None = None, + ) -> bytes: + """ + Выполнить POST запрос. + + Args: + endpoint: Путь или полный URL + data: Тело запроса (bytes или str) + json: JSON тело запроса + headers: Дополнительные заголовки + + Returns: + Содержимое ответа как bytes + + Raises: + ConnectionError: При ошибке подключения + HTTPError: При HTTP ошибке (4xx, 5xx) + """ + url = self._build_url(endpoint) + logger.info("POST %s (proxy: %s)", url, self._current_proxy) + + request_headers = {} + if headers: + request_headers.update(headers) + + try: + response = self.session.post( + url, + data=data, + json=json, + headers=request_headers, + timeout=self.timeout, + ) + except requests.exceptions.ConnectionError as e: + logger.error("Connection error: %s - %s", url, e) + raise ConnectionError(f"Failed to connect to {url}", url=url) from e + except requests.exceptions.Timeout as e: + logger.error("Timeout: %s", url) + raise ConnectionError(f"Request timeout for {url}", url=url) from e + except requests.exceptions.RequestException as e: + logger.error("Request error: %s - %s", url, e) + raise HTTPClientError(f"Request failed: {e}", url=url) from e + + if not response.ok: + logger.error("HTTP error %d: %s", response.status_code, url) + raise HTTPError( + f"HTTP {response.status_code} for {url}", + status_code=response.status_code, + url=url, + ) + + logger.debug("POST Response %d from %s", response.status_code, url) + return response.content + def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict: """ Выполнить GET запрос и вернуть JSON. @@ -203,12 +262,15 @@ class BaseHTTPClient: response = self.get(endpoint, params=params) return response.json() - def download_file(self, endpoint: str) -> bytes: + def download_file( + self, endpoint: str, headers: dict[str, str] | None = None + ) -> bytes: """ Скачать файл. Args: endpoint: Путь или полный URL файла + headers: Дополнительные заголовки Returns: Содержимое файла как bytes @@ -216,9 +278,34 @@ class BaseHTTPClient: url = self._build_url(endpoint) logger.info("Downloading file: %s", url) - response = self.get(endpoint) - content = response.content + # Выполняем GET с дополнительными заголовками + request_headers = {} + if headers: + request_headers.update(headers) + try: + response = self.session.get( + url, headers=request_headers, timeout=self.timeout + ) + except requests.exceptions.ConnectionError as e: + logger.error("Connection error: %s - %s", url, e) + raise ConnectionError(f"Failed to connect to {url}", url=url) from e + except requests.exceptions.Timeout as e: + logger.error("Timeout: %s", url) + raise ConnectionError(f"Request timeout for {url}", url=url) from e + except requests.exceptions.RequestException as e: + logger.error("Request error: %s - %s", url, e) + raise HTTPClientError(f"Request failed: {e}", url=url) from e + + if not response.ok: + logger.error("HTTP error %d: %s", response.status_code, url) + raise HTTPError( + f"HTTP {response.status_code} for {url}", + status_code=response.status_code, + url=url, + ) + + content = response.content logger.info("Downloaded %d bytes from %s", len(content), url) return content diff --git a/src/apps/parsers/clients/zakupki/__init__.py b/src/apps/parsers/clients/zakupki/__init__.py index c43ae34..099811b 100644 --- a/src/apps/parsers/clients/zakupki/__init__.py +++ b/src/apps/parsers/clients/zakupki/__init__.py @@ -370,7 +370,9 @@ class ZakupkiClient: """ - def _parse_soap_response(self, response_content: bytes) -> str | None: + def _parse_soap_response( # noqa: C901 + self, response_content: bytes + ) -> str | None: """Извлечь URL архива из SOAP ответа.""" try: xml_str = response_content.decode("utf-8") @@ -383,12 +385,27 @@ class ZakupkiClient: logger.info("Found archive URL: %s", elem.text) return elem.text.strip() - # Проверяем на ошибки + # Проверяем на errorInfo (структурированная ошибка ЕИС) for elem in root.iter(): - if "fault" in elem.tag.lower() or "error" in elem.tag.lower(): - error_text = elem.text or ET.tostring(elem, encoding="unicode") - logger.error("SOAP error: %s", error_text) - raise ZakupkiClientError(f"SOAP error: {error_text}") + if elem.tag.endswith("errorInfo"): + code = "" + message = "" + for child in elem: + if child.tag.endswith("code") and child.text: + code = child.text.strip() + if child.tag.endswith("message") and child.text: + message = child.text.strip() + if message: + error_msg = f"[{code}] {message}" if code else message + logger.error("EIS error: %s", error_msg) + raise ZakupkiClientError(f"EIS error: {error_msg}") + + # Проверяем на fault (SOAP fault) + for elem in root.iter(): + if "fault" in elem.tag.lower(): + error_text = ET.tostring(elem, encoding="unicode") + logger.error("SOAP fault: %s", error_text) + raise ZakupkiClientError(f"SOAP fault: {error_text}") logger.warning("No archiveUrl found in SOAP response") return None @@ -432,13 +449,17 @@ class ZakupkiClient: procurements = self._download_and_parse_http(plan.file_url, None) all_procurements.extend(procurements) logger.info( - "Parsed %d procurements from %s", len(procurements), plan.file_name + "Parsed %d procurements from %s", + len(procurements), + plan.file_name, ) if progress_callback: progress_callback(95, f"Загружено {len(all_procurements)} закупок") - logger.info("Total fetched %d procurements via HTTP", len(all_procurements)) + logger.info( + "Total fetched %d procurements via HTTP", len(all_procurements) + ) return all_procurements def _discover_data_files( diff --git a/src/apps/parsers/tests/test_e2e.py b/src/apps/parsers/tests/test_e2e.py index dfb292a..67d348d 100644 --- a/src/apps/parsers/tests/test_e2e.py +++ b/src/apps/parsers/tests/test_e2e.py @@ -16,11 +16,17 @@ import unittest from apps.parsers.clients.zakupki import ZakupkiClient from apps.parsers.models import ParserLoadLog, ProcurementRecord from apps.parsers.services import ParserLoadLogService, ProcurementService +from django.conf import settings from django.test import TestCase, override_settings # Флаг для запуска E2E тестов RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes") +# Токен из settings (или переменной окружения) +ZAKUPKI_TOKEN = getattr(settings, "ZAKUPKI_TOKEN", "") or os.environ.get( + "ZAKUPKI_TOKEN", "" +) + @unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable") class ZakupkiClientE2ETestCase(TestCase): @@ -32,7 +38,7 @@ class ZakupkiClientE2ETestCase(TestCase): def setUp(self): """Подготовка.""" - self.client = ZakupkiClient(timeout=60) + self.client = ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) def tearDown(self): """Очистка.""" @@ -103,7 +109,7 @@ class ProcurementServiceE2ETestCase(TestCase): ) # Загрузка данных - with ZakupkiClient(timeout=60) as client: + with ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) as client: procurements = client.fetch_procurements( region_code="77", year=2025, diff --git a/src/config/settings/base.py b/src/config/settings/base.py index 17cfd8b..044fe66 100644 --- a/src/config/settings/base.py +++ b/src/config/settings/base.py @@ -221,6 +221,21 @@ CACHES = { } +# ============================================================================= +# PARSERS SETTINGS +# ============================================================================= + +# Zakupki.gov.ru API Token (получить через Госуслуги) +ZAKUPKI_TOKEN = get_env("ZAKUPKI_TOKEN", "") + +# Proxy list for parsers (comma-separated) +PARSER_PROXIES = get_env("PARSER_PROXIES", "") +if isinstance(PARSER_PROXIES, str) and PARSER_PROXIES: + PARSER_PROXIES = [p.strip() for p in PARSER_PROXIES.split(",") if p.strip()] +else: + PARSER_PROXIES = [] + + # Password validation AUTH_PASSWORD_VALIDATORS = [ {