feat(parsers): add SOAP API support for zakupki.gov.ru

- Add post() method to BaseHTTPClient for SOAP requests
- Update download_file() to support custom headers (for token)
- Add ZAKUPKI_TOKEN and PARSER_PROXIES settings
- Improve SOAP error parsing to show EIS error messages
- Update E2E tests to use token from settings
- Add data/ and .zed/ to gitignore
This commit is contained in:
2026-01-28 13:13:10 +01:00
parent c6483d8427
commit a369642459
6 changed files with 149 additions and 14 deletions

View File

@@ -29,3 +29,7 @@ LOG_LEVEL=INFO
# Scrapy Settings # Scrapy Settings
SCRAPY_LOG_LEVEL=INFO SCRAPY_LOG_LEVEL=INFO
# Parsers API Tokens
# Токен для zakupki.gov.ru (получить через Госуслуги на https://zakupki.gov.ru/pmd/auth/welcome)
ZAKUPKI_TOKEN=

2
.gitignore vendored
View File

@@ -39,3 +39,5 @@ Thumbs.db
# Backup files # Backup files
*.bak *.bak
*.backupdata/ *.backupdata/
data/
.zed/

View File

@@ -189,6 +189,65 @@ class BaseHTTPClient:
logger.debug("Response %d from %s", response.status_code, url) logger.debug("Response %d from %s", response.status_code, url)
return response return response
def post(
self,
endpoint: str,
data: bytes | str | None = None,
json: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> bytes:
"""
Выполнить POST запрос.
Args:
endpoint: Путь или полный URL
data: Тело запроса (bytes или str)
json: JSON тело запроса
headers: Дополнительные заголовки
Returns:
Содержимое ответа как bytes
Raises:
ConnectionError: При ошибке подключения
HTTPError: При HTTP ошибке (4xx, 5xx)
"""
url = self._build_url(endpoint)
logger.info("POST %s (proxy: %s)", url, self._current_proxy)
request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.post(
url,
data=data,
json=json,
headers=request_headers,
timeout=self.timeout,
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
logger.debug("POST Response %d from %s", response.status_code, url)
return response.content
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict: def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
""" """
Выполнить GET запрос и вернуть JSON. Выполнить GET запрос и вернуть JSON.
@@ -203,12 +262,15 @@ class BaseHTTPClient:
response = self.get(endpoint, params=params) response = self.get(endpoint, params=params)
return response.json() return response.json()
def download_file(self, endpoint: str) -> bytes: def download_file(
self, endpoint: str, headers: dict[str, str] | None = None
) -> bytes:
""" """
Скачать файл. Скачать файл.
Args: Args:
endpoint: Путь или полный URL файла endpoint: Путь или полный URL файла
headers: Дополнительные заголовки
Returns: Returns:
Содержимое файла как bytes Содержимое файла как bytes
@@ -216,9 +278,34 @@ class BaseHTTPClient:
url = self._build_url(endpoint) url = self._build_url(endpoint)
logger.info("Downloading file: %s", url) logger.info("Downloading file: %s", url)
response = self.get(endpoint) # Выполняем GET с дополнительными заголовками
content = response.content request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.get(
url, headers=request_headers, timeout=self.timeout
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
content = response.content
logger.info("Downloaded %d bytes from %s", len(content), url) logger.info("Downloaded %d bytes from %s", len(content), url)
return content return content

View File

@@ -370,7 +370,9 @@ class ZakupkiClient:
</soapenv:Body> </soapenv:Body>
</soapenv:Envelope>""" </soapenv:Envelope>"""
def _parse_soap_response(self, response_content: bytes) -> str | None: def _parse_soap_response( # noqa: C901
self, response_content: bytes
) -> str | None:
"""Извлечь URL архива из SOAP ответа.""" """Извлечь URL архива из SOAP ответа."""
try: try:
xml_str = response_content.decode("utf-8") xml_str = response_content.decode("utf-8")
@@ -383,12 +385,27 @@ class ZakupkiClient:
logger.info("Found archive URL: %s", elem.text) logger.info("Found archive URL: %s", elem.text)
return elem.text.strip() return elem.text.strip()
# Проверяем на ошибки # Проверяем на errorInfo (структурированная ошибка ЕИС)
for elem in root.iter(): for elem in root.iter():
if "fault" in elem.tag.lower() or "error" in elem.tag.lower(): if elem.tag.endswith("errorInfo"):
error_text = elem.text or ET.tostring(elem, encoding="unicode") code = ""
logger.error("SOAP error: %s", error_text) message = ""
raise ZakupkiClientError(f"SOAP error: {error_text}") for child in elem:
if child.tag.endswith("code") and child.text:
code = child.text.strip()
if child.tag.endswith("message") and child.text:
message = child.text.strip()
if message:
error_msg = f"[{code}] {message}" if code else message
logger.error("EIS error: %s", error_msg)
raise ZakupkiClientError(f"EIS error: {error_msg}")
# Проверяем на fault (SOAP fault)
for elem in root.iter():
if "fault" in elem.tag.lower():
error_text = ET.tostring(elem, encoding="unicode")
logger.error("SOAP fault: %s", error_text)
raise ZakupkiClientError(f"SOAP fault: {error_text}")
logger.warning("No archiveUrl found in SOAP response") logger.warning("No archiveUrl found in SOAP response")
return None return None
@@ -432,13 +449,17 @@ class ZakupkiClient:
procurements = self._download_and_parse_http(plan.file_url, None) procurements = self._download_and_parse_http(plan.file_url, None)
all_procurements.extend(procurements) all_procurements.extend(procurements)
logger.info( logger.info(
"Parsed %d procurements from %s", len(procurements), plan.file_name "Parsed %d procurements from %s",
len(procurements),
plan.file_name,
) )
if progress_callback: if progress_callback:
progress_callback(95, f"Загружено {len(all_procurements)} закупок") progress_callback(95, f"Загружено {len(all_procurements)} закупок")
logger.info("Total fetched %d procurements via HTTP", len(all_procurements)) logger.info(
"Total fetched %d procurements via HTTP", len(all_procurements)
)
return all_procurements return all_procurements
def _discover_data_files( def _discover_data_files(

View File

@@ -16,11 +16,17 @@ import unittest
from apps.parsers.clients.zakupki import ZakupkiClient from apps.parsers.clients.zakupki import ZakupkiClient
from apps.parsers.models import ParserLoadLog, ProcurementRecord from apps.parsers.models import ParserLoadLog, ProcurementRecord
from apps.parsers.services import ParserLoadLogService, ProcurementService from apps.parsers.services import ParserLoadLogService, ProcurementService
from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
# Флаг для запуска E2E тестов # Флаг для запуска E2E тестов
RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes") RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes")
# Токен из settings (или переменной окружения)
ZAKUPKI_TOKEN = getattr(settings, "ZAKUPKI_TOKEN", "") or os.environ.get(
"ZAKUPKI_TOKEN", ""
)
@unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable") @unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable")
class ZakupkiClientE2ETestCase(TestCase): class ZakupkiClientE2ETestCase(TestCase):
@@ -32,7 +38,7 @@ class ZakupkiClientE2ETestCase(TestCase):
def setUp(self): def setUp(self):
"""Подготовка.""" """Подготовка."""
self.client = ZakupkiClient(timeout=60) self.client = ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60)
def tearDown(self): def tearDown(self):
"""Очистка.""" """Очистка."""
@@ -103,7 +109,7 @@ class ProcurementServiceE2ETestCase(TestCase):
) )
# Загрузка данных # Загрузка данных
with ZakupkiClient(timeout=60) as client: with ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) as client:
procurements = client.fetch_procurements( procurements = client.fetch_procurements(
region_code="77", region_code="77",
year=2025, year=2025,

View File

@@ -221,6 +221,21 @@ CACHES = {
} }
# =============================================================================
# PARSERS SETTINGS
# =============================================================================
# Zakupki.gov.ru API Token (получить через Госуслуги)
ZAKUPKI_TOKEN = get_env("ZAKUPKI_TOKEN", "")
# Proxy list for parsers (comma-separated)
PARSER_PROXIES = get_env("PARSER_PROXIES", "")
if isinstance(PARSER_PROXIES, str) and PARSER_PROXIES:
PARSER_PROXIES = [p.strip() for p in PARSER_PROXIES.split(",") if p.strip()]
else:
PARSER_PROXIES = []
# Password validation # Password validation
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS = [
{ {