"""Integration-style tests for parsers clients using a local HTTP server.""" from __future__ import annotations from urllib.parse import urlparse import requests from requests.adapters import BaseAdapter from apps.parsers.clients.base import ( BaseHTTPClient, ConnectionError, HTTPClientError, HTTPError, ) from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer from apps.parsers.clients.proverki import ProverkiClient from apps.parsers.clients.proverki.schemas import Inspection from django.test import TestCase, tag from tests.utils import Response, TestHTTPServer from tests.utils.fixtures import ( build_minpromtorg_certificates_excel, build_minpromtorg_manufacturers_excel, build_proverki_xml, fake, ) def _host_from_base_url(base_url: str) -> str: parsed = urlparse(base_url) if parsed.port: return f"{parsed.hostname}:{parsed.port}" return parsed.hostname or "" def _base_url() -> str: return f"https://{fake.domain_name()}" def _proxy_address() -> str: return f"http://{fake.ipv4()}:{fake.port_number()}" class _RaisingAdapter(BaseAdapter): def __init__(self, exc: Exception) -> None: super().__init__() self._exc = exc def send(self, _request, **_kwargs): raise self._exc def close(self) -> None: return class BaseHTTPClientTest(TestCase): """Tests for BaseHTTPClient.""" def test_client_initialization(self): base_url = _base_url() client = BaseHTTPClient(base_url=base_url) self.assertEqual(client.base_url, base_url) self.assertIsNone(client.proxies) self.assertEqual(client.timeout, 30) def test_client_with_proxies(self): proxies = [_proxy_address(), _proxy_address()] client = BaseHTTPClient(base_url=_base_url(), proxies=proxies) self.assertEqual(client.proxies, proxies) def test_select_proxy_returns_none_without_proxies(self): client = BaseHTTPClient(base_url=_base_url()) self.assertIsNone(client._select_proxy()) def test_select_proxy_returns_random_from_list(self): proxies = [_proxy_address(), _proxy_address()] client = BaseHTTPClient(base_url=_base_url(), proxies=proxies) selected = client._select_proxy() self.assertIn(selected, proxies) def test_current_proxy_property(self): proxies = [_proxy_address()] client = BaseHTTPClient(base_url=_base_url(), proxies=proxies) self.assertIsNone(client.current_proxy) _ = client.session self.assertEqual(client.current_proxy, proxies[0]) def test_build_url_with_full_url(self): full = "https://example.com/path" client = BaseHTTPClient(base_url=_base_url()) self.assertEqual(client._build_url(full), full) def test_get_json_and_download_file(self): with TestHTTPServer() as server: server.add_json("/api/data", {"ok": True}) server.add_bytes("/files/data.bin", b"payload") client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter) data = client.get_json("/api/data") content = client.download_file("/files/data.bin") self.assertTrue(data["ok"]) self.assertEqual(content, b"payload") def test_post_success_and_error(self): def echo_handler(_req, body): return Response(status=200, body=body, headers={}) with TestHTTPServer() as server: server.add_route("POST", "/echo", echo_handler) server.add_route("GET", "/missing", lambda _req, _body: Response(status=404)) client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter) result = client.post("/echo", data=b"ping") self.assertEqual(result, b"ping") with self.assertRaises(HTTPError): client.get("/missing") server.add_route("POST", "/error", lambda _req, _body: Response(status=500)) with self.assertRaises(HTTPError): client.post("/error", data=b"fail") def test_download_file_error(self): with TestHTTPServer() as server: server.add_route("GET", "/missing.bin", lambda _req, _body: Response(status=404)) client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter) with self.assertRaises(HTTPError): client.download_file("/missing.bin") def test_connection_error(self): client = BaseHTTPClient(base_url="http://127.0.0.1:1", timeout=0.01) with self.assertRaises(ConnectionError): client.get("/unreachable") def test_context_manager_closes_session(self): with TestHTTPServer() as server: server.add_json("/ping", {"ok": True}) with BaseHTTPClient(base_url=server.base_url, adapter=server.adapter) as client: client.get_json("/ping") self.assertIsNotNone(client._session) self.assertIsNone(client._session) def test_rotate_proxy(self): proxies = [_proxy_address(), _proxy_address()] client = BaseHTTPClient(base_url=_base_url(), proxies=proxies) first = client.rotate_proxy() self.assertIn(first, proxies) def test_https_base_url_mounts_adapter(self): with TestHTTPServer() as server: base_url = f"https://{fake.domain_name()}" client = BaseHTTPClient(base_url=base_url, adapter=server.adapter) session = client.session self.assertIsNotNone(session) def test_rotate_proxy_closes_existing_session(self): proxies = [_proxy_address()] client = BaseHTTPClient(base_url=_base_url(), proxies=proxies) _ = client.session self.assertIsNotNone(client._session) client.rotate_proxy() self.assertIsNone(client._session) def test_get_timeout_raises_connection_error(self): adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(ConnectionError): client.get("/timeout") def test_get_request_exception_raises_http_client_error(self): adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(HTTPClientError): client.get("/boom") def test_post_connection_error_raises_connection_error(self): adapter = _RaisingAdapter(requests.exceptions.ConnectionError(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(ConnectionError): client.post("/fail", data=fake.pystr(min_chars=5, max_chars=10)) def test_post_timeout_raises_connection_error(self): adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(ConnectionError): client.post("/timeout", data=fake.pystr(min_chars=5, max_chars=10)) def test_post_request_exception_raises_http_client_error(self): adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(HTTPClientError): client.post("/boom", data=fake.pystr(min_chars=5, max_chars=10)) def test_download_timeout_raises_connection_error(self): adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(ConnectionError): client.download_file("/timeout.bin") def test_download_request_exception_raises_http_client_error(self): adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence())) client = BaseHTTPClient(base_url=_base_url(), adapter=adapter) with self.assertRaises(HTTPClientError): client.download_file("/boom.bin") class IndustrialProductionClientTest(TestCase): """Tests for IndustrialProductionClient.""" def test_client_initialization(self): client = IndustrialProductionClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): proxies = [_proxy_address(), _proxy_address()] client = IndustrialProductionClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): with IndustrialProductionClient() as client: self.assertIsInstance(client, IndustrialProductionClient) def test_fetch_certificates_success(self): excel_bytes, rows = build_minpromtorg_certificates_excel(count=5) date_str = fake.date_between(start_date="-30d", end_date="today").strftime( "%Y%m%d" ) file_name = f"data_resolutions_{date_str}.xlsx" with TestHTTPServer() as server: server.add_json( "/api/kss-document-preview", { "data": [ { "name": IndustrialProductionClient().query, "files": [ {"name": file_name, "url": f"/files/{file_name}"} ], } ] }, ) server.add_bytes( f"/files/{file_name}", excel_bytes, content_type=( "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ), ) client = IndustrialProductionClient( host=_host_from_base_url(server.base_url), scheme="http", http_adapter=server.adapter, ) certificates = client.fetch_certificates() self.assertEqual(len(certificates), len(rows)) self.assertIsInstance(certificates[0], IndustrialCertificate) self.assertSetEqual( {c.certificate_number for c in certificates}, {r.certificate_number for r in rows}, ) def test_fetch_certificates_no_files(self): with TestHTTPServer() as server: server.add_json("/api/kss-document-preview", {"data": []}) client = IndustrialProductionClient( host=_host_from_base_url(server.base_url), scheme="http", http_adapter=server.adapter, ) certificates = client.fetch_certificates() self.assertEqual(certificates, []) def test_get_latest_file_url_selects_newest(self): client = IndustrialProductionClient() dates = sorted( { fake.date_between(start_date="-90d", end_date="today") for _ in range(3) } ) files = [] for date in dates: date_str = date.strftime("%Y%m%d") files.append( { "name": f"data_resolutions_{date_str}.xlsx", "url": f"/files/{date_str}.xlsx", } ) url = client._get_latest_file_url(files) self.assertIn(dates[-1].strftime("%Y%m%d"), url) def test_parse_row_valid(self): client = IndustrialProductionClient() row = ( str(fake.date()), fake.bothify(text="??-####-#####"), str(fake.date()), fake.url(), fake.company(), "".join(str(fake.random_int(0, 9)) for _ in range(10)), "".join(str(fake.random_int(0, 9)) for _ in range(13)), ) result = client._parse_row(row) self.assertIsInstance(result, IndustrialCertificate) self.assertEqual(result.certificate_number, row[1]) self.assertEqual(result.inn, row[5]) def test_parse_row_invalid(self): client = IndustrialProductionClient() result = client._parse_row(("only", "two")) self.assertIsNone(result) class ManufacturesClientTest(TestCase): """Tests for ManufacturesClient.""" def test_client_initialization(self): client = ManufacturesClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): proxies = [_proxy_address(), _proxy_address()] client = ManufacturesClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): with ManufacturesClient() as client: self.assertIsInstance(client, ManufacturesClient) def test_fetch_manufacturers_success(self): excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=5) date_str = fake.date_between(start_date="-30d", end_date="today").strftime( "%Y%m%d" ) file_name = f"data_orgs_{date_str}.xlsx" with TestHTTPServer() as server: server.add_json( "/api/kss-document-preview", { "data": [ { "name": ManufacturesClient().query, "files": [ {"name": file_name, "url": f"/files/{file_name}"} ], } ] }, ) server.add_bytes( f"/files/{file_name}", excel_bytes, content_type=( "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ), ) client = ManufacturesClient( host=_host_from_base_url(server.base_url), scheme="http", http_adapter=server.adapter, ) manufacturers = client.fetch_manufacturers() self.assertEqual(len(manufacturers), len(rows)) self.assertIsInstance(manufacturers[0], Manufacturer) self.assertSetEqual( {m.full_legal_name for m in manufacturers}, {r.full_legal_name for r in rows}, ) def test_fetch_manufacturers_no_files(self): with TestHTTPServer() as server: server.add_json("/api/kss-document-preview", {"data": []}) client = ManufacturesClient( host=_host_from_base_url(server.base_url), scheme="http", http_adapter=server.adapter, ) manufacturers = client.fetch_manufacturers() self.assertEqual(manufacturers, []) def test_get_latest_file_url_selects_newest(self): client = ManufacturesClient() dates = sorted( { fake.date_between(start_date="-90d", end_date="today") for _ in range(3) } ) files = [] for date in dates: date_str = date.strftime("%Y%m%d") files.append( {"name": f"data_orgs_{date_str}.xlsx", "url": f"/files/{date_str}"} ) url = client._get_latest_file_url(files) self.assertIn(dates[-1].strftime("%Y%m%d"), url) def test_parse_row_valid(self): client = ManufacturesClient() row = ( fake.company(), "".join(str(fake.random_int(0, 9)) for _ in range(10)), "".join(str(fake.random_int(0, 9)) for _ in range(13)), fake.address().replace("\n", ", "), ) result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.full_legal_name, row[0]) self.assertEqual(result.inn, row[1]) def test_parse_row_without_address(self): client = ManufacturesClient() row = ( fake.company(), "".join(str(fake.random_int(0, 9)) for _ in range(10)), "".join(str(fake.random_int(0, 9)) for _ in range(13)), ) result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.address, "") @tag("integration", "slow") class IndustrialProductionClientIntegrationTest(TestCase): """Integration test using local HTTP server instead of external API.""" def test_fetch_certificates_local_server(self): excel_bytes, rows = build_minpromtorg_certificates_excel(count=3) date_str = fake.date_between(start_date="-30d", end_date="today").strftime( "%Y%m%d" ) file_name = f"data_resolutions_{date_str}.xlsx" with TestHTTPServer() as server: server.add_json( "/api/kss-document-preview", { "data": [ { "name": IndustrialProductionClient().query, "files": [ {"name": file_name, "url": f"/files/{file_name}"} ], } ] }, ) server.add_bytes(f"/files/{file_name}", excel_bytes) client = IndustrialProductionClient( host=_host_from_base_url(server.base_url), scheme="http", timeout=30, http_adapter=server.adapter, ) certificates = client.fetch_certificates() self.assertEqual(len(certificates), len(rows)) @tag("integration", "slow") class ManufacturesClientIntegrationTest(TestCase): """Integration test using local HTTP server instead of external API.""" def test_fetch_manufacturers_local_server(self): excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=3) date_str = fake.date_between(start_date="-30d", end_date="today").strftime( "%Y%m%d" ) file_name = f"data_orgs_{date_str}.xlsx" with TestHTTPServer() as server: server.add_json( "/api/kss-document-preview", { "data": [ { "name": ManufacturesClient().query, "files": [ {"name": file_name, "url": f"/files/{file_name}"} ], } ] }, ) server.add_bytes(f"/files/{file_name}", excel_bytes) client = ManufacturesClient( host=_host_from_base_url(server.base_url), scheme="http", timeout=30, http_adapter=server.adapter, ) manufacturers = client.fetch_manufacturers() self.assertEqual(len(manufacturers), len(rows)) class ProverkiClientTest(TestCase): """Tests for ProverkiClient.""" def test_client_initialization(self): client = ProverkiClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "proverki.gov.ru") def test_client_with_proxies(self): proxies = [_proxy_address(), _proxy_address()] client = ProverkiClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): with ProverkiClient() as client: self.assertIsInstance(client, ProverkiClient) def test_parse_xml_content_english_tags(self): client = ProverkiClient() xml_content, rows = build_proverki_xml(count=2) inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), len(rows)) self.assertIsInstance(inspections[0], Inspection) self.assertSetEqual( {i.registration_number for i in inspections}, {r.registration_number for r in rows}, ) def test_parse_xml_content_russian_tags(self): client = ProverkiClient() reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12)) inn = "".join(str(fake.random_int(0, 9)) for _ in range(10)) ogrn = "".join(str(fake.random_int(0, 9)) for _ in range(13)) org_name = fake.company() authority = fake.company() xml_content = ( "" "<Проверки>" "<КНМ>" f"<УчетныйНомер>{reg_num}" f"<ИНН>{inn}" f"<ОГРН>{ogrn}" f"<Наименование>{org_name}" f"<КонтрольныйОрган>{authority}" "" "" ).encode("utf-8") inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertEqual(inspections[0].registration_number, reg_num) self.assertEqual(inspections[0].inn, inn) self.assertEqual(inspections[0].control_authority, authority) def test_parse_xml_record_with_attributes(self): from xml.etree import ElementTree as ET row_inn = "".join(str(fake.random_int(0, 9)) for _ in range(10)) reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12)) element = ET.fromstring( f"" ) # noqa: S314 client = ProverkiClient() result = client._parse_xml_record(element) self.assertIsNotNone(result) self.assertEqual(result.inn, row_inn) self.assertEqual(result.registration_number, reg_num) def test_parse_xml_record_invalid(self): from xml.etree import ElementTree as ET element = ET.fromstring("") # noqa: S314 client = ProverkiClient() self.assertIsNone(client._parse_xml_record(element)) def test_parse_windows_1251_encoding(self): org_name = fake.company() inn = "".join(str(fake.random_int(0, 9)) for _ in range(10)) reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12)) xml_content = ( "" "" "" f"{inn}" f"{reg_num}" f"{org_name}" "" "" ).encode("windows-1251") client = ProverkiClient() inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertEqual(inspections[0].organisation_name, org_name) def test_fetch_inspections_with_file_url(self): xml_content, rows = build_proverki_xml(count=2) with TestHTTPServer() as server: server.add_bytes( "/files/inspections.xml", xml_content, content_type="text/xml" ) client = ProverkiClient( host=_host_from_base_url(server.base_url), scheme="http", use_playwright=False, http_adapter=server.adapter, ) inspections = client.fetch_inspections( file_url=f"{server.base_url}/files/inspections.xml" ) self.assertEqual(len(inspections), len(rows)) def test_fetch_inspections_no_files(self): client = ProverkiClient(use_playwright=False) inspections = client.fetch_inspections() self.assertEqual(inspections, []) @tag("integration", "slow") class ProverkiClientIntegrationTest(TestCase): """Integration test using local HTTP server for proverki.gov.ru.""" def test_fetch_inspections_local_server(self): xml_content, rows = build_proverki_xml(count=3) with TestHTTPServer() as server: server.add_bytes( "/files/inspections.xml", xml_content, content_type="text/xml" ) client = ProverkiClient( host=_host_from_base_url(server.base_url), scheme="http", use_playwright=False, http_adapter=server.adapter, ) inspections = client.fetch_inspections( file_url=f"{server.base_url}/files/inspections.xml" ) self.assertEqual(len(inspections), len(rows))