"""Integration-style tests for parsers clients using a local HTTP server."""
from __future__ import annotations
from urllib.parse import urlparse
import requests
from apps.parsers.clients.base import (
BaseHTTPClient,
ConnectionError,
HTTPClientError,
HTTPError,
)
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
from apps.parsers.clients.minpromtorg.products import IndustrialProductsClient
from apps.parsers.clients.minpromtorg.schemas import (
IndustrialCertificate,
IndustrialProduct,
Manufacturer,
)
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
from django.test import TestCase, tag
from requests.adapters import BaseAdapter
from tests.utils import Response, TestHTTPServer
from tests.utils.fixtures import (
build_minpromtorg_certificates_excel,
build_minpromtorg_manufacturers_excel,
build_minpromtorg_products_excel,
build_proverki_xml,
fake,
)
def _host_from_base_url(base_url: str) -> str:
parsed = urlparse(base_url)
if parsed.port:
return f"{parsed.hostname}:{parsed.port}"
return parsed.hostname or ""
def _base_url() -> str:
return f"https://{fake.domain_name()}"
def _proxy_address() -> str:
return f"http://{fake.ipv4()}:{fake.port_number()}"
def _digits(length: int) -> str:
return "".join(str(fake.random_int(0, 9)) for _ in range(length))
class _RaisingAdapter(BaseAdapter):
def __init__(self, exc: Exception) -> None:
super().__init__()
self._exc = exc
def send(self, _request, **_kwargs):
raise self._exc
def close(self) -> None:
return
class BaseHTTPClientTest(TestCase):
"""Tests for BaseHTTPClient."""
def test_client_initialization(self):
base_url = _base_url()
client = BaseHTTPClient(base_url=base_url)
self.assertEqual(client.base_url, base_url)
self.assertIsNone(client.proxies)
self.assertEqual(client.timeout, 30)
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_select_proxy_returns_none_without_proxies(self):
client = BaseHTTPClient(base_url=_base_url())
self.assertIsNone(client._select_proxy())
def test_select_proxy_returns_random_from_list(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
selected = client._select_proxy()
self.assertIn(selected, proxies)
def test_current_proxy_property(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertIsNone(client.current_proxy)
_ = client.session
self.assertEqual(client.current_proxy, proxies[0])
def test_build_url_with_full_url(self):
full = "https://example.com/path"
client = BaseHTTPClient(base_url=_base_url())
self.assertEqual(client._build_url(full), full)
def test_get_json_and_download_file(self):
with TestHTTPServer() as server:
server.add_json("/api/data", {"ok": True})
server.add_bytes("/files/data.bin", b"payload")
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
data = client.get_json("/api/data")
content = client.download_file("/files/data.bin")
self.assertTrue(data["ok"])
self.assertEqual(content, b"payload")
def test_post_success_and_error(self):
def echo_handler(_req, body):
return Response(status=200, body=body, headers={})
with TestHTTPServer() as server:
server.add_route("POST", "/echo", echo_handler)
server.add_route(
"GET", "/missing", lambda _req, _body: Response(status=404)
)
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
result = client.post("/echo", data=b"ping")
self.assertEqual(result, b"ping")
with self.assertRaises(HTTPError):
client.get("/missing")
server.add_route("POST", "/error", lambda _req, _body: Response(status=500))
with self.assertRaises(HTTPError):
client.post("/error", data=b"fail")
def test_download_file_error(self):
with TestHTTPServer() as server:
server.add_route(
"GET", "/missing.bin", lambda _req, _body: Response(status=404)
)
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
with self.assertRaises(HTTPError):
client.download_file("/missing.bin")
def test_connection_error(self):
client = BaseHTTPClient(base_url="http://127.0.0.1:1", timeout=0.01)
with self.assertRaises(ConnectionError):
client.get("/unreachable")
def test_context_manager_closes_session(self):
with TestHTTPServer() as server:
server.add_json("/ping", {"ok": True})
with BaseHTTPClient(
base_url=server.base_url, adapter=server.adapter
) as client:
client.get_json("/ping")
self.assertIsNotNone(client._session)
self.assertIsNone(client._session)
def test_rotate_proxy(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
first = client.rotate_proxy()
self.assertIn(first, proxies)
def test_https_base_url_mounts_adapter(self):
with TestHTTPServer() as server:
base_url = f"https://{fake.domain_name()}"
client = BaseHTTPClient(base_url=base_url, adapter=server.adapter)
session = client.session
self.assertIsNotNone(session)
def test_rotate_proxy_closes_existing_session(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
_ = client.session
self.assertIsNotNone(client._session)
client.rotate_proxy()
self.assertIsNone(client._session)
def test_get_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.get("/timeout")
def test_get_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.get("/boom")
def test_post_connection_error_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.ConnectionError(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/fail", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/timeout", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.post("/boom", data=fake.pystr(min_chars=5, max_chars=10))
def test_download_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.download_file("/timeout.bin")
def test_download_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.download_file("/boom.bin")
class IndustrialProductionClientTest(TestCase):
"""Tests for IndustrialProductionClient."""
def test_client_initialization(self):
client = IndustrialProductionClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = IndustrialProductionClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with IndustrialProductionClient() as client:
self.assertIsInstance(client, IndustrialProductionClient)
def test_fetch_certificates_success(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
self.assertIsInstance(certificates[0], IndustrialCertificate)
self.assertSetEqual(
{c.certificate_number for c in certificates},
{r.certificate_number for r in rows},
)
def test_fetch_certificates_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(certificates, [])
def test_get_latest_file_url_selects_newest(self):
client = IndustrialProductionClient()
dates = sorted(
{fake.date_between(start_date="-90d", end_date="today") for _ in range(3)}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{
"name": f"data_resolutions_{date_str}.xlsx",
"url": f"/files/{date_str}.xlsx",
}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = IndustrialProductionClient()
row = (
str(fake.date()),
fake.bothify(text="??-####-#####"),
str(fake.date()),
fake.url(),
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, IndustrialCertificate)
self.assertEqual(result.certificate_number, row[1])
self.assertEqual(result.inn, row[5])
def test_parse_row_invalid(self):
client = IndustrialProductionClient()
result = client._parse_row(("only", "two"))
self.assertIsNone(result)
class ManufacturesClientTest(TestCase):
"""Tests for ManufacturesClient."""
def test_client_initialization(self):
client = ManufacturesClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ManufacturesClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ManufacturesClient() as client:
self.assertIsInstance(client, ManufacturesClient)
def test_fetch_manufacturers_success(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
self.assertIsInstance(manufacturers[0], Manufacturer)
self.assertSetEqual(
{m.full_legal_name for m in manufacturers},
{r.full_legal_name for r in rows},
)
def test_fetch_manufacturers_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(manufacturers, [])
def test_get_latest_file_url_selects_newest(self):
client = ManufacturesClient()
dates = sorted(
{fake.date_between(start_date="-90d", end_date="today") for _ in range(3)}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{"name": f"data_orgs_{date_str}.xlsx", "url": f"/files/{date_str}"}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
fake.address().replace("\n", ", "),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.full_legal_name, row[0])
self.assertEqual(result.inn, row[1])
def test_parse_row_without_address(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.address, "")
class IndustrialProductsClientTest(TestCase):
"""Tests for IndustrialProductsClient."""
def test_client_initialization(self):
client = IndustrialProductsClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_fetch_products_success(self):
excel_bytes, rows = build_minpromtorg_products_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"industrial_products_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), len(rows))
self.assertIsInstance(products[0], IndustrialProduct)
self.assertSetEqual(
{product.registry_number for product in products},
{row.registry_number for row in rows},
)
def test_fetch_products_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(products, [])
def test_get_latest_file_url_falls_back_to_excel_file(self):
client = IndustrialProductsClient()
files = [
{"name": "readme.txt", "url": "/files/readme.txt"},
{"name": "registry.xlsx", "url": "/files/registry.xlsx"},
]
url = client._get_latest_file_url(files)
self.assertEqual(url, "https://minpromtorg.gov.ru/files/registry.xlsx")
def test_parse_row_valid(self):
client = IndustrialProductsClient()
header_map = {
"full_organisation_name": 0,
"ogrn": 1,
"inn": 2,
"registry_number": 3,
"product_name": 4,
"product_model": 5,
"okpd2_code": 6,
"tnved_code": 7,
"regulatory_document": 8,
}
row = (
fake.company(),
_digits(13),
_digits(10),
f"MPP-{_digits(8)}",
fake.sentence(nb_words=4),
fake.bothify(text="MODEL-###"),
"25.11",
_digits(10),
fake.sentence(nb_words=5),
)
result = client._parse_row(row, header_map)
self.assertIsInstance(result, IndustrialProduct)
self.assertEqual(result.registry_number, row[3])
self.assertEqual(result.product_name, row[4])
def test_parse_row_without_required_fields(self):
client = IndustrialProductsClient()
header_map = {
"full_organisation_name": 0,
"ogrn": 1,
"inn": 2,
"registry_number": 3,
"product_name": 4,
}
result = client._parse_row(
(fake.company(), _digits(13), _digits(10), "", ""), header_map
)
self.assertIsNone(result)
@tag("integration", "slow")
class IndustrialProductionClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_certificates_local_server(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
@tag("integration", "slow")
class ManufacturesClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_manufacturers_local_server(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
@tag("integration", "slow")
class IndustrialProductsClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_products_local_server(self):
excel_bytes, rows = build_minpromtorg_products_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"industrial_products_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), len(rows))
class ProverkiClientTest(TestCase):
"""Tests for ProverkiClient."""
def test_client_initialization(self):
client = ProverkiClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "proverki.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ProverkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ProverkiClient() as client:
self.assertIsInstance(client, ProverkiClient)
def test_parse_xml_content_english_tags(self):
client = ProverkiClient()
xml_content, rows = build_proverki_xml(count=2)
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), len(rows))
self.assertIsInstance(inspections[0], Inspection)
self.assertSetEqual(
{i.registration_number for i in inspections},
{r.registration_number for r in rows},
)
def test_parse_xml_content_russian_tags(self):
client = ProverkiClient()
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
ogrn = "".join(str(fake.random_int(0, 9)) for _ in range(13))
org_name = fake.company()
authority = fake.company()
xml_content = (
""
"<Проверки>"
"<КНМ>"
f"<УчетныйНомер>{reg_num}УчетныйНомер>"
f"<ИНН>{inn}ИНН>"
f"<ОГРН>{ogrn}ОГРН>"
f"<Наименование>{org_name}Наименование>"
f"<КонтрольныйОрган>{authority}КонтрольныйОрган>"
"КНМ>"
"Проверки>"
).encode()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].registration_number, reg_num)
self.assertEqual(inspections[0].inn, inn)
self.assertEqual(inspections[0].control_authority, authority)
def test_parse_xml_record_with_attributes(self):
from defusedxml import ElementTree as ET
row_inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
element = ET.fromstring(
f''
) # noqa: S314
client = ProverkiClient()
result = client._parse_xml_record(element)
self.assertIsNotNone(result)
self.assertEqual(result.inn, row_inn)
self.assertEqual(result.registration_number, reg_num)
def test_parse_xml_record_invalid(self):
from defusedxml import ElementTree as ET
element = ET.fromstring("") # noqa: S314
client = ProverkiClient()
self.assertIsNone(client._parse_xml_record(element))
def test_parse_windows_1251_encoding(self):
org_name = fake.company()
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
xml_content = (
''
""
""
f"{inn}"
f"{reg_num}"
f"{org_name}"
""
""
).encode("windows-1251")
client = ProverkiClient()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].organisation_name, org_name)
def test_fetch_inspections_with_file_url(self):
xml_content, rows = build_proverki_xml(count=2)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))
def test_fetch_inspections_no_files(self):
client = ProverkiClient(use_playwright=False)
inspections = client.fetch_inspections()
self.assertEqual(inspections, [])
@tag("integration", "slow")
class ProverkiClientIntegrationTest(TestCase):
"""Integration test using local HTTP server for proverki.gov.ru."""
def test_fetch_inspections_local_server(self):
xml_content, rows = build_proverki_xml(count=3)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))