Files
mostovik-backend/tests/apps/parsers/test_clients.py
Aleksandr Meshchriakov ee95628a0a
Some checks failed
CI/CD Pipeline / Run Tests (push) Failing after 37s
CI/CD Pipeline / Code Quality Checks (push) Failing after 43s
CI/CD Pipeline / Build & Push Images (push) Has been skipped
CI/CD Pipeline / Deploy (dev) (push) Has been skipped
CI/CD Pipeline / Deploy (prod) (push) Has been skipped
CI/CD Pipeline / Code Quality Checks (pull_request) Failing after 0s
CI/CD Pipeline / Run Tests (pull_request) Failing after 0s
CI/CD Pipeline / Build & Push Images (pull_request) Has been skipped
CI/CD Pipeline / Deploy (dev) (pull_request) Has been skipped
CI/CD Pipeline / Deploy (prod) (pull_request) Has been skipped
feat: обновления парсеров, тестов и миграций
- Обновлены клиенты парсеров (checko, fns, minpromtorg, proverki, zakupki)
- Добавлены новые миграции для моделей
- Расширено покрытие тестами
- Обновлены конфигурации и настройки проекта
- Добавлены утилиты для тестирования

Co-Authored-By: Warp <agent@warp.dev>
2026-02-10 10:17:47 +01:00

668 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Integration-style tests for parsers clients using a local HTTP server."""
from __future__ import annotations
from urllib.parse import urlparse
import requests
from requests.adapters import BaseAdapter
from apps.parsers.clients.base import (
BaseHTTPClient,
ConnectionError,
HTTPClientError,
HTTPError,
)
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
from django.test import TestCase, tag
from tests.utils import Response, TestHTTPServer
from tests.utils.fixtures import (
build_minpromtorg_certificates_excel,
build_minpromtorg_manufacturers_excel,
build_proverki_xml,
fake,
)
def _host_from_base_url(base_url: str) -> str:
parsed = urlparse(base_url)
if parsed.port:
return f"{parsed.hostname}:{parsed.port}"
return parsed.hostname or ""
def _base_url() -> str:
return f"https://{fake.domain_name()}"
def _proxy_address() -> str:
return f"http://{fake.ipv4()}:{fake.port_number()}"
class _RaisingAdapter(BaseAdapter):
def __init__(self, exc: Exception) -> None:
super().__init__()
self._exc = exc
def send(self, _request, **_kwargs):
raise self._exc
def close(self) -> None:
return
class BaseHTTPClientTest(TestCase):
"""Tests for BaseHTTPClient."""
def test_client_initialization(self):
base_url = _base_url()
client = BaseHTTPClient(base_url=base_url)
self.assertEqual(client.base_url, base_url)
self.assertIsNone(client.proxies)
self.assertEqual(client.timeout, 30)
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_select_proxy_returns_none_without_proxies(self):
client = BaseHTTPClient(base_url=_base_url())
self.assertIsNone(client._select_proxy())
def test_select_proxy_returns_random_from_list(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
selected = client._select_proxy()
self.assertIn(selected, proxies)
def test_current_proxy_property(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertIsNone(client.current_proxy)
_ = client.session
self.assertEqual(client.current_proxy, proxies[0])
def test_build_url_with_full_url(self):
full = "https://example.com/path"
client = BaseHTTPClient(base_url=_base_url())
self.assertEqual(client._build_url(full), full)
def test_get_json_and_download_file(self):
with TestHTTPServer() as server:
server.add_json("/api/data", {"ok": True})
server.add_bytes("/files/data.bin", b"payload")
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
data = client.get_json("/api/data")
content = client.download_file("/files/data.bin")
self.assertTrue(data["ok"])
self.assertEqual(content, b"payload")
def test_post_success_and_error(self):
def echo_handler(_req, body):
return Response(status=200, body=body, headers={})
with TestHTTPServer() as server:
server.add_route("POST", "/echo", echo_handler)
server.add_route("GET", "/missing", lambda _req, _body: Response(status=404))
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
result = client.post("/echo", data=b"ping")
self.assertEqual(result, b"ping")
with self.assertRaises(HTTPError):
client.get("/missing")
server.add_route("POST", "/error", lambda _req, _body: Response(status=500))
with self.assertRaises(HTTPError):
client.post("/error", data=b"fail")
def test_download_file_error(self):
with TestHTTPServer() as server:
server.add_route("GET", "/missing.bin", lambda _req, _body: Response(status=404))
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
with self.assertRaises(HTTPError):
client.download_file("/missing.bin")
def test_connection_error(self):
client = BaseHTTPClient(base_url="http://127.0.0.1:1", timeout=0.01)
with self.assertRaises(ConnectionError):
client.get("/unreachable")
def test_context_manager_closes_session(self):
with TestHTTPServer() as server:
server.add_json("/ping", {"ok": True})
with BaseHTTPClient(base_url=server.base_url, adapter=server.adapter) as client:
client.get_json("/ping")
self.assertIsNotNone(client._session)
self.assertIsNone(client._session)
def test_rotate_proxy(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
first = client.rotate_proxy()
self.assertIn(first, proxies)
def test_https_base_url_mounts_adapter(self):
with TestHTTPServer() as server:
base_url = f"https://{fake.domain_name()}"
client = BaseHTTPClient(base_url=base_url, adapter=server.adapter)
session = client.session
self.assertIsNotNone(session)
def test_rotate_proxy_closes_existing_session(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
_ = client.session
self.assertIsNotNone(client._session)
client.rotate_proxy()
self.assertIsNone(client._session)
def test_get_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.get("/timeout")
def test_get_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.get("/boom")
def test_post_connection_error_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.ConnectionError(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/fail", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/timeout", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.post("/boom", data=fake.pystr(min_chars=5, max_chars=10))
def test_download_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.download_file("/timeout.bin")
def test_download_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.download_file("/boom.bin")
class IndustrialProductionClientTest(TestCase):
"""Tests for IndustrialProductionClient."""
def test_client_initialization(self):
client = IndustrialProductionClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = IndustrialProductionClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with IndustrialProductionClient() as client:
self.assertIsInstance(client, IndustrialProductionClient)
def test_fetch_certificates_success(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
self.assertIsInstance(certificates[0], IndustrialCertificate)
self.assertSetEqual(
{c.certificate_number for c in certificates},
{r.certificate_number for r in rows},
)
def test_fetch_certificates_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(certificates, [])
def test_get_latest_file_url_selects_newest(self):
client = IndustrialProductionClient()
dates = sorted(
{
fake.date_between(start_date="-90d", end_date="today")
for _ in range(3)
}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{
"name": f"data_resolutions_{date_str}.xlsx",
"url": f"/files/{date_str}.xlsx",
}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = IndustrialProductionClient()
row = (
str(fake.date()),
fake.bothify(text="??-####-#####"),
str(fake.date()),
fake.url(),
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, IndustrialCertificate)
self.assertEqual(result.certificate_number, row[1])
self.assertEqual(result.inn, row[5])
def test_parse_row_invalid(self):
client = IndustrialProductionClient()
result = client._parse_row(("only", "two"))
self.assertIsNone(result)
class ManufacturesClientTest(TestCase):
"""Tests for ManufacturesClient."""
def test_client_initialization(self):
client = ManufacturesClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ManufacturesClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ManufacturesClient() as client:
self.assertIsInstance(client, ManufacturesClient)
def test_fetch_manufacturers_success(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
self.assertIsInstance(manufacturers[0], Manufacturer)
self.assertSetEqual(
{m.full_legal_name for m in manufacturers},
{r.full_legal_name for r in rows},
)
def test_fetch_manufacturers_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(manufacturers, [])
def test_get_latest_file_url_selects_newest(self):
client = ManufacturesClient()
dates = sorted(
{
fake.date_between(start_date="-90d", end_date="today")
for _ in range(3)
}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{"name": f"data_orgs_{date_str}.xlsx", "url": f"/files/{date_str}"}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
fake.address().replace("\n", ", "),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.full_legal_name, row[0])
self.assertEqual(result.inn, row[1])
def test_parse_row_without_address(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.address, "")
@tag("integration", "slow")
class IndustrialProductionClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_certificates_local_server(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
@tag("integration", "slow")
class ManufacturesClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_manufacturers_local_server(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
class ProverkiClientTest(TestCase):
"""Tests for ProverkiClient."""
def test_client_initialization(self):
client = ProverkiClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "proverki.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ProverkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ProverkiClient() as client:
self.assertIsInstance(client, ProverkiClient)
def test_parse_xml_content_english_tags(self):
client = ProverkiClient()
xml_content, rows = build_proverki_xml(count=2)
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), len(rows))
self.assertIsInstance(inspections[0], Inspection)
self.assertSetEqual(
{i.registration_number for i in inspections},
{r.registration_number for r in rows},
)
def test_parse_xml_content_russian_tags(self):
client = ProverkiClient()
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
ogrn = "".join(str(fake.random_int(0, 9)) for _ in range(13))
org_name = fake.company()
authority = fake.company()
xml_content = (
"<?xml version='1.0' encoding='utf-8'?>"
"<Проверки>"
"<КНМ>"
f"<УчетныйНомер>{reg_num}</УчетныйНомер>"
f"<ИНН>{inn}</ИНН>"
f"<ОГРН>{ogrn}</ОГРН>"
f"<Наименование>{org_name}</Наименование>"
f"<КонтрольныйОрган>{authority}</КонтрольныйОрган>"
"</КНМ>"
"</Проверки>"
).encode("utf-8")
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].registration_number, reg_num)
self.assertEqual(inspections[0].inn, inn)
self.assertEqual(inspections[0].control_authority, authority)
def test_parse_xml_record_with_attributes(self):
from xml.etree import ElementTree as ET
row_inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
element = ET.fromstring(
f"<inspection inn=\"{row_inn}\" registration_number=\"{reg_num}\" />"
) # noqa: S314
client = ProverkiClient()
result = client._parse_xml_record(element)
self.assertIsNotNone(result)
self.assertEqual(result.inn, row_inn)
self.assertEqual(result.registration_number, reg_num)
def test_parse_xml_record_invalid(self):
from xml.etree import ElementTree as ET
element = ET.fromstring("<empty_record></empty_record>") # noqa: S314
client = ProverkiClient()
self.assertIsNone(client._parse_xml_record(element))
def test_parse_windows_1251_encoding(self):
org_name = fake.company()
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
xml_content = (
"<?xml version=\"1.0\" encoding=\"windows-1251\"?>"
"<inspections>"
"<inspection>"
f"<inn>{inn}</inn>"
f"<registration_number>{reg_num}</registration_number>"
f"<organisation_name>{org_name}</organisation_name>"
"</inspection>"
"</inspections>"
).encode("windows-1251")
client = ProverkiClient()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].organisation_name, org_name)
def test_fetch_inspections_with_file_url(self):
xml_content, rows = build_proverki_xml(count=2)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))
def test_fetch_inspections_no_files(self):
client = ProverkiClient(use_playwright=False)
inspections = client.fetch_inspections()
self.assertEqual(inspections, [])
@tag("integration", "slow")
class ProverkiClientIntegrationTest(TestCase):
"""Integration test using local HTTP server for proverki.gov.ru."""
def test_fetch_inspections_local_server(self):
xml_content, rows = build_proverki_xml(count=3)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))