feat(parsers): add SOAP API support for zakupki.gov.ru

- Add post() method to BaseHTTPClient for SOAP requests
- Update download_file() to support custom headers (for token)
- Add ZAKUPKI_TOKEN and PARSER_PROXIES settings
- Improve SOAP error parsing to show EIS error messages
- Update E2E tests to use token from settings
- Add data/ and .zed/ to gitignore
This commit is contained in:
2026-01-28 13:13:10 +01:00
parent c6483d8427
commit a369642459
6 changed files with 149 additions and 14 deletions

View File

@@ -29,3 +29,7 @@ LOG_LEVEL=INFO
# Scrapy Settings
SCRAPY_LOG_LEVEL=INFO
# Parsers API Tokens
# Токен для zakupki.gov.ru (получить через Госуслуги на https://zakupki.gov.ru/pmd/auth/welcome)
ZAKUPKI_TOKEN=

2
.gitignore vendored
View File

@@ -39,3 +39,5 @@ Thumbs.db
# Backup files
*.bak
*.backupdata/
data/
.zed/

View File

@@ -189,6 +189,65 @@ class BaseHTTPClient:
logger.debug("Response %d from %s", response.status_code, url)
return response
def post(
self,
endpoint: str,
data: bytes | str | None = None,
json: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> bytes:
"""
Выполнить POST запрос.
Args:
endpoint: Путь или полный URL
data: Тело запроса (bytes или str)
json: JSON тело запроса
headers: Дополнительные заголовки
Returns:
Содержимое ответа как bytes
Raises:
ConnectionError: При ошибке подключения
HTTPError: При HTTP ошибке (4xx, 5xx)
"""
url = self._build_url(endpoint)
logger.info("POST %s (proxy: %s)", url, self._current_proxy)
request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.post(
url,
data=data,
json=json,
headers=request_headers,
timeout=self.timeout,
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
logger.debug("POST Response %d from %s", response.status_code, url)
return response.content
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
"""
Выполнить GET запрос и вернуть JSON.
@@ -203,12 +262,15 @@ class BaseHTTPClient:
response = self.get(endpoint, params=params)
return response.json()
def download_file(self, endpoint: str) -> bytes:
def download_file(
self, endpoint: str, headers: dict[str, str] | None = None
) -> bytes:
"""
Скачать файл.
Args:
endpoint: Путь или полный URL файла
headers: Дополнительные заголовки
Returns:
Содержимое файла как bytes
@@ -216,9 +278,34 @@ class BaseHTTPClient:
url = self._build_url(endpoint)
logger.info("Downloading file: %s", url)
response = self.get(endpoint)
content = response.content
# Выполняем GET с дополнительными заголовками
request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.get(
url, headers=request_headers, timeout=self.timeout
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
content = response.content
logger.info("Downloaded %d bytes from %s", len(content), url)
return content

View File

@@ -370,7 +370,9 @@ class ZakupkiClient:
</soapenv:Body>
</soapenv:Envelope>"""
def _parse_soap_response(self, response_content: bytes) -> str | None:
def _parse_soap_response( # noqa: C901
self, response_content: bytes
) -> str | None:
"""Извлечь URL архива из SOAP ответа."""
try:
xml_str = response_content.decode("utf-8")
@@ -383,12 +385,27 @@ class ZakupkiClient:
logger.info("Found archive URL: %s", elem.text)
return elem.text.strip()
# Проверяем на ошибки
# Проверяем на errorInfo (структурированная ошибка ЕИС)
for elem in root.iter():
if "fault" in elem.tag.lower() or "error" in elem.tag.lower():
error_text = elem.text or ET.tostring(elem, encoding="unicode")
logger.error("SOAP error: %s", error_text)
raise ZakupkiClientError(f"SOAP error: {error_text}")
if elem.tag.endswith("errorInfo"):
code = ""
message = ""
for child in elem:
if child.tag.endswith("code") and child.text:
code = child.text.strip()
if child.tag.endswith("message") and child.text:
message = child.text.strip()
if message:
error_msg = f"[{code}] {message}" if code else message
logger.error("EIS error: %s", error_msg)
raise ZakupkiClientError(f"EIS error: {error_msg}")
# Проверяем на fault (SOAP fault)
for elem in root.iter():
if "fault" in elem.tag.lower():
error_text = ET.tostring(elem, encoding="unicode")
logger.error("SOAP fault: %s", error_text)
raise ZakupkiClientError(f"SOAP fault: {error_text}")
logger.warning("No archiveUrl found in SOAP response")
return None
@@ -432,13 +449,17 @@ class ZakupkiClient:
procurements = self._download_and_parse_http(plan.file_url, None)
all_procurements.extend(procurements)
logger.info(
"Parsed %d procurements from %s", len(procurements), plan.file_name
"Parsed %d procurements from %s",
len(procurements),
plan.file_name,
)
if progress_callback:
progress_callback(95, f"Загружено {len(all_procurements)} закупок")
logger.info("Total fetched %d procurements via HTTP", len(all_procurements))
logger.info(
"Total fetched %d procurements via HTTP", len(all_procurements)
)
return all_procurements
def _discover_data_files(

View File

@@ -16,11 +16,17 @@ import unittest
from apps.parsers.clients.zakupki import ZakupkiClient
from apps.parsers.models import ParserLoadLog, ProcurementRecord
from apps.parsers.services import ParserLoadLogService, ProcurementService
from django.conf import settings
from django.test import TestCase, override_settings
# Флаг для запуска E2E тестов
RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes")
# Токен из settings (или переменной окружения)
ZAKUPKI_TOKEN = getattr(settings, "ZAKUPKI_TOKEN", "") or os.environ.get(
"ZAKUPKI_TOKEN", ""
)
@unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable")
class ZakupkiClientE2ETestCase(TestCase):
@@ -32,7 +38,7 @@ class ZakupkiClientE2ETestCase(TestCase):
def setUp(self):
"""Подготовка."""
self.client = ZakupkiClient(timeout=60)
self.client = ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60)
def tearDown(self):
"""Очистка."""
@@ -103,7 +109,7 @@ class ProcurementServiceE2ETestCase(TestCase):
)
# Загрузка данных
with ZakupkiClient(timeout=60) as client:
with ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) as client:
procurements = client.fetch_procurements(
region_code="77",
year=2025,

View File

@@ -221,6 +221,21 @@ CACHES = {
}
# =============================================================================
# PARSERS SETTINGS
# =============================================================================
# Zakupki.gov.ru API Token (получить через Госуслуги)
ZAKUPKI_TOKEN = get_env("ZAKUPKI_TOKEN", "")
# Proxy list for parsers (comma-separated)
PARSER_PROXIES = get_env("PARSER_PROXIES", "")
if isinstance(PARSER_PROXIES, str) and PARSER_PROXIES:
PARSER_PROXIES = [p.strip() for p in PARSER_PROXIES.split(",") if p.strip()]
else:
PARSER_PROXIES = []
# Password validation
AUTH_PASSWORD_VALIDATORS = [
{