feat(parsers): add SOAP API support for zakupki.gov.ru

- Add post() method to BaseHTTPClient for SOAP requests
- Update download_file() to support custom headers (for token)
- Add ZAKUPKI_TOKEN and PARSER_PROXIES settings
- Improve SOAP error parsing to show EIS error messages
- Update E2E tests to use token from settings
- Add data/ and .zed/ to gitignore
This commit is contained in:
2026-01-28 13:13:10 +01:00
parent c6483d8427
commit a369642459
6 changed files with 149 additions and 14 deletions

View File

@@ -189,6 +189,65 @@ class BaseHTTPClient:
logger.debug("Response %d from %s", response.status_code, url)
return response
def post(
self,
endpoint: str,
data: bytes | str | None = None,
json: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> bytes:
"""
Выполнить POST запрос.
Args:
endpoint: Путь или полный URL
data: Тело запроса (bytes или str)
json: JSON тело запроса
headers: Дополнительные заголовки
Returns:
Содержимое ответа как bytes
Raises:
ConnectionError: При ошибке подключения
HTTPError: При HTTP ошибке (4xx, 5xx)
"""
url = self._build_url(endpoint)
logger.info("POST %s (proxy: %s)", url, self._current_proxy)
request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.post(
url,
data=data,
json=json,
headers=request_headers,
timeout=self.timeout,
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
logger.debug("POST Response %d from %s", response.status_code, url)
return response.content
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
"""
Выполнить GET запрос и вернуть JSON.
@@ -203,12 +262,15 @@ class BaseHTTPClient:
response = self.get(endpoint, params=params)
return response.json()
def download_file(self, endpoint: str) -> bytes:
def download_file(
self, endpoint: str, headers: dict[str, str] | None = None
) -> bytes:
"""
Скачать файл.
Args:
endpoint: Путь или полный URL файла
headers: Дополнительные заголовки
Returns:
Содержимое файла как bytes
@@ -216,9 +278,34 @@ class BaseHTTPClient:
url = self._build_url(endpoint)
logger.info("Downloading file: %s", url)
response = self.get(endpoint)
content = response.content
# Выполняем GET с дополнительными заголовками
request_headers = {}
if headers:
request_headers.update(headers)
try:
response = self.session.get(
url, headers=request_headers, timeout=self.timeout
)
except requests.exceptions.ConnectionError as e:
logger.error("Connection error: %s - %s", url, e)
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
except requests.exceptions.Timeout as e:
logger.error("Timeout: %s", url)
raise ConnectionError(f"Request timeout for {url}", url=url) from e
except requests.exceptions.RequestException as e:
logger.error("Request error: %s - %s", url, e)
raise HTTPClientError(f"Request failed: {e}", url=url) from e
if not response.ok:
logger.error("HTTP error %d: %s", response.status_code, url)
raise HTTPError(
f"HTTP {response.status_code} for {url}",
status_code=response.status_code,
url=url,
)
content = response.content
logger.info("Downloaded %d bytes from %s", len(content), url)
return content

View File

@@ -370,7 +370,9 @@ class ZakupkiClient:
</soapenv:Body>
</soapenv:Envelope>"""
def _parse_soap_response(self, response_content: bytes) -> str | None:
def _parse_soap_response( # noqa: C901
self, response_content: bytes
) -> str | None:
"""Извлечь URL архива из SOAP ответа."""
try:
xml_str = response_content.decode("utf-8")
@@ -383,12 +385,27 @@ class ZakupkiClient:
logger.info("Found archive URL: %s", elem.text)
return elem.text.strip()
# Проверяем на ошибки
# Проверяем на errorInfo (структурированная ошибка ЕИС)
for elem in root.iter():
if "fault" in elem.tag.lower() or "error" in elem.tag.lower():
error_text = elem.text or ET.tostring(elem, encoding="unicode")
logger.error("SOAP error: %s", error_text)
raise ZakupkiClientError(f"SOAP error: {error_text}")
if elem.tag.endswith("errorInfo"):
code = ""
message = ""
for child in elem:
if child.tag.endswith("code") and child.text:
code = child.text.strip()
if child.tag.endswith("message") and child.text:
message = child.text.strip()
if message:
error_msg = f"[{code}] {message}" if code else message
logger.error("EIS error: %s", error_msg)
raise ZakupkiClientError(f"EIS error: {error_msg}")
# Проверяем на fault (SOAP fault)
for elem in root.iter():
if "fault" in elem.tag.lower():
error_text = ET.tostring(elem, encoding="unicode")
logger.error("SOAP fault: %s", error_text)
raise ZakupkiClientError(f"SOAP fault: {error_text}")
logger.warning("No archiveUrl found in SOAP response")
return None
@@ -432,13 +449,17 @@ class ZakupkiClient:
procurements = self._download_and_parse_http(plan.file_url, None)
all_procurements.extend(procurements)
logger.info(
"Parsed %d procurements from %s", len(procurements), plan.file_name
"Parsed %d procurements from %s",
len(procurements),
plan.file_name,
)
if progress_callback:
progress_callback(95, f"Загружено {len(all_procurements)} закупок")
logger.info("Total fetched %d procurements via HTTP", len(all_procurements))
logger.info(
"Total fetched %d procurements via HTTP", len(all_procurements)
)
return all_procurements
def _discover_data_files(