Files
mostovik-backend/tests/apps/parsers/test_clients.py
Aleksandr Meshchriakov b2355b0e63
All checks were successful
CI/CD Pipeline / Quality Gate (push) Successful in 20s
CI/CD Pipeline / Build and Push Images (push) Successful in 6s
CI/CD Pipeline / Internal Notify (push) Successful in 1s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Successful in 1s
fix(parsers): support official product registry headers
2026-04-28 23:05:48 +02:00

947 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Integration-style tests for parsers clients using a local HTTP server."""
from __future__ import annotations
import io
from unittest.mock import patch
from urllib.parse import urlparse
import requests
from apps.parsers.clients.base import (
BaseHTTPClient,
ConnectionError,
HTTPClientError,
HTTPError,
)
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
from apps.parsers.clients.minpromtorg.products import IndustrialProductsClient
from apps.parsers.clients.minpromtorg.schemas import (
IndustrialCertificate,
IndustrialProduct,
Manufacturer,
)
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
from django.test import TestCase, tag
from openpyxl import Workbook
from openpyxl import load_workbook as openpyxl_load_workbook
from requests.adapters import BaseAdapter
from tests.utils import Response, TestHTTPServer
from tests.utils.fixtures import (
build_minpromtorg_certificates_excel,
build_minpromtorg_manufacturers_excel,
build_minpromtorg_products_excel,
build_proverki_xml,
fake,
)
def _host_from_base_url(base_url: str) -> str:
parsed = urlparse(base_url)
if parsed.port:
return f"{parsed.hostname}:{parsed.port}"
return parsed.hostname or ""
def _base_url() -> str:
return f"https://{fake.domain_name()}"
def _proxy_address() -> str:
return f"http://{fake.ipv4()}:{fake.port_number()}"
def _digits(length: int) -> str:
return "".join(str(fake.random_int(0, 9)) for _ in range(length))
class _RaisingAdapter(BaseAdapter):
def __init__(self, exc: Exception) -> None:
super().__init__()
self._exc = exc
def send(self, _request, **_kwargs):
raise self._exc
def close(self) -> None:
return
class BaseHTTPClientTest(TestCase):
"""Tests for BaseHTTPClient."""
def test_client_initialization(self):
base_url = _base_url()
client = BaseHTTPClient(base_url=base_url)
self.assertEqual(client.base_url, base_url)
self.assertIsNone(client.proxies)
self.assertEqual(client.timeout, 30)
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_select_proxy_returns_none_without_proxies(self):
client = BaseHTTPClient(base_url=_base_url())
self.assertIsNone(client._select_proxy())
def test_select_proxy_returns_random_from_list(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
selected = client._select_proxy()
self.assertIn(selected, proxies)
def test_current_proxy_property(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
self.assertIsNone(client.current_proxy)
_ = client.session
self.assertEqual(client.current_proxy, proxies[0])
def test_build_url_with_full_url(self):
full = "https://example.com/path"
client = BaseHTTPClient(base_url=_base_url())
self.assertEqual(client._build_url(full), full)
def test_get_json_and_download_file(self):
with TestHTTPServer() as server:
server.add_json("/api/data", {"ok": True})
server.add_bytes("/files/data.bin", b"payload")
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
data = client.get_json("/api/data")
content = client.download_file("/files/data.bin")
self.assertTrue(data["ok"])
self.assertEqual(content, b"payload")
def test_post_success_and_error(self):
def echo_handler(_req, body):
return Response(status=200, body=body, headers={})
with TestHTTPServer() as server:
server.add_route("POST", "/echo", echo_handler)
server.add_route(
"GET", "/missing", lambda _req, _body: Response(status=404)
)
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
result = client.post("/echo", data=b"ping")
self.assertEqual(result, b"ping")
with self.assertRaises(HTTPError):
client.get("/missing")
server.add_route("POST", "/error", lambda _req, _body: Response(status=500))
with self.assertRaises(HTTPError):
client.post("/error", data=b"fail")
def test_download_file_error(self):
with TestHTTPServer() as server:
server.add_route(
"GET", "/missing.bin", lambda _req, _body: Response(status=404)
)
client = BaseHTTPClient(base_url=server.base_url, adapter=server.adapter)
with self.assertRaises(HTTPError):
client.download_file("/missing.bin")
def test_connection_error(self):
client = BaseHTTPClient(base_url="http://127.0.0.1:1", timeout=0.01)
with self.assertRaises(ConnectionError):
client.get("/unreachable")
def test_context_manager_closes_session(self):
with TestHTTPServer() as server:
server.add_json("/ping", {"ok": True})
with BaseHTTPClient(
base_url=server.base_url, adapter=server.adapter
) as client:
client.get_json("/ping")
self.assertIsNotNone(client._session)
self.assertIsNone(client._session)
def test_rotate_proxy(self):
proxies = [_proxy_address(), _proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
first = client.rotate_proxy()
self.assertIn(first, proxies)
def test_https_base_url_mounts_adapter(self):
with TestHTTPServer() as server:
base_url = f"https://{fake.domain_name()}"
client = BaseHTTPClient(base_url=base_url, adapter=server.adapter)
session = client.session
self.assertIsNotNone(session)
def test_rotate_proxy_closes_existing_session(self):
proxies = [_proxy_address()]
client = BaseHTTPClient(base_url=_base_url(), proxies=proxies)
_ = client.session
self.assertIsNotNone(client._session)
client.rotate_proxy()
self.assertIsNone(client._session)
def test_get_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.get("/timeout")
def test_get_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.get("/boom")
def test_post_connection_error_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.ConnectionError(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/fail", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.post("/timeout", data=fake.pystr(min_chars=5, max_chars=10))
def test_post_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.post("/boom", data=fake.pystr(min_chars=5, max_chars=10))
def test_download_timeout_raises_connection_error(self):
adapter = _RaisingAdapter(requests.exceptions.Timeout(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(ConnectionError):
client.download_file("/timeout.bin")
def test_download_request_exception_raises_http_client_error(self):
adapter = _RaisingAdapter(requests.exceptions.RequestException(fake.sentence()))
client = BaseHTTPClient(base_url=_base_url(), adapter=adapter)
with self.assertRaises(HTTPClientError):
client.download_file("/boom.bin")
class IndustrialProductionClientTest(TestCase):
"""Tests for IndustrialProductionClient."""
def test_client_initialization(self):
client = IndustrialProductionClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = IndustrialProductionClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with IndustrialProductionClient() as client:
self.assertIsInstance(client, IndustrialProductionClient)
def test_fetch_certificates_success(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
self.assertIsInstance(certificates[0], IndustrialCertificate)
self.assertSetEqual(
{c.certificate_number for c in certificates},
{r.certificate_number for r in rows},
)
def test_fetch_certificates_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(certificates, [])
def test_get_latest_file_url_selects_newest(self):
client = IndustrialProductionClient()
dates = sorted(
{fake.date_between(start_date="-90d", end_date="today") for _ in range(3)}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{
"name": f"data_resolutions_{date_str}.xlsx",
"url": f"/files/{date_str}.xlsx",
}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = IndustrialProductionClient()
row = (
str(fake.date()),
fake.bothify(text="??-####-#####"),
str(fake.date()),
fake.url(),
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, IndustrialCertificate)
self.assertEqual(result.certificate_number, row[1])
self.assertEqual(result.inn, row[5])
def test_parse_row_invalid(self):
client = IndustrialProductionClient()
result = client._parse_row(("only", "two"))
self.assertIsNone(result)
class ManufacturesClientTest(TestCase):
"""Tests for ManufacturesClient."""
def test_client_initialization(self):
client = ManufacturesClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ManufacturesClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ManufacturesClient() as client:
self.assertIsInstance(client, ManufacturesClient)
def test_fetch_manufacturers_success(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
self.assertIsInstance(manufacturers[0], Manufacturer)
self.assertSetEqual(
{m.full_legal_name for m in manufacturers},
{r.full_legal_name for r in rows},
)
def test_fetch_manufacturers_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(manufacturers, [])
def test_get_latest_file_url_selects_newest(self):
client = ManufacturesClient()
dates = sorted(
{fake.date_between(start_date="-90d", end_date="today") for _ in range(3)}
)
files = []
for date in dates:
date_str = date.strftime("%Y%m%d")
files.append(
{"name": f"data_orgs_{date_str}.xlsx", "url": f"/files/{date_str}"}
)
url = client._get_latest_file_url(files)
self.assertIn(dates[-1].strftime("%Y%m%d"), url)
def test_parse_row_valid(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
fake.address().replace("\n", ", "),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.full_legal_name, row[0])
self.assertEqual(result.inn, row[1])
def test_parse_row_without_address(self):
client = ManufacturesClient()
row = (
fake.company(),
"".join(str(fake.random_int(0, 9)) for _ in range(10)),
"".join(str(fake.random_int(0, 9)) for _ in range(13)),
)
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.address, "")
class IndustrialProductsClientTest(TestCase):
"""Tests for IndustrialProductsClient."""
def test_client_initialization(self):
client = IndustrialProductsClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_fetch_products_success(self):
excel_bytes, rows = build_minpromtorg_products_excel(count=5)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"industrial_products_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(
f"/files/{file_name}",
excel_bytes,
content_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
)
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), len(rows))
self.assertIsInstance(products[0], IndustrialProduct)
self.assertSetEqual(
{product.registry_number for product in products},
{row.registry_number for row in rows},
)
def test_fetch_products_no_files(self):
with TestHTTPServer() as server:
server.add_json("/api/kss-document-preview", {"data": []})
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(products, [])
def test_fetch_products_uses_read_only_workbook(self):
excel_bytes, rows = build_minpromtorg_products_excel(count=2)
file_name = "industrial_products_20260428.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
with patch(
"apps.parsers.clients.minpromtorg.products.load_workbook",
wraps=openpyxl_load_workbook,
) as load_workbook_mock:
products = client.fetch_products()
self.assertEqual(len(products), len(rows))
self.assertTrue(load_workbook_mock.call_args.kwargs["read_only"])
def test_fetch_products_supports_official_english_headers(self):
workbook = Workbook()
worksheet = workbook.active
worksheet.append(
[
"Nameoforg",
"OGRN",
"INN",
"Orgaddr",
"Productmanufaddress",
"Regnumber",
"Ektrudp",
"Docdate",
"Docvalidtill",
"Enddate",
"Registernumber",
"Productname",
"OKPD2",
"TNVED",
]
)
worksheet.append(
[
"ООО Тест",
"1027700032953",
"7701256405",
"-",
"-",
"4963\\2\\2023",
"-",
"2026-04-28",
"2029-04-27",
"-",
"10092840",
"Плата материнская",
"26.20.30",
"8471 80 000 0",
]
)
buffer = io.BytesIO()
workbook.save(buffer)
workbook.close()
file_name = "industrial_products_20260428.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", buffer.getvalue())
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), 1)
self.assertEqual(products[0].full_organisation_name, "ООО Тест")
self.assertEqual(products[0].registry_number, "10092840")
self.assertEqual(products[0].product_name, "Плата материнская")
self.assertEqual(products[0].okpd2_code, "26.20.30")
self.assertEqual(products[0].tnved_code, "8471 80 000 0")
def test_get_latest_file_url_falls_back_to_excel_file(self):
client = IndustrialProductsClient()
files = [
{"name": "readme.txt", "url": "/files/readme.txt"},
{"name": "registry.xlsx", "url": "/files/registry.xlsx"},
]
url = client._get_latest_file_url(files)
self.assertEqual(url, "https://minpromtorg.gov.ru/files/registry.xlsx")
def test_parse_row_valid(self):
client = IndustrialProductsClient()
header_map = {
"full_organisation_name": 0,
"ogrn": 1,
"inn": 2,
"registry_number": 3,
"product_name": 4,
"product_model": 5,
"okpd2_code": 6,
"tnved_code": 7,
"regulatory_document": 8,
}
row = (
fake.company(),
_digits(13),
_digits(10),
f"MPP-{_digits(8)}",
fake.sentence(nb_words=4),
fake.bothify(text="MODEL-###"),
"25.11",
_digits(10),
fake.sentence(nb_words=5),
)
result = client._parse_row(row, header_map)
self.assertIsInstance(result, IndustrialProduct)
self.assertEqual(result.registry_number, row[3])
self.assertEqual(result.product_name, row[4])
def test_parse_row_without_required_fields(self):
client = IndustrialProductsClient()
header_map = {
"full_organisation_name": 0,
"ogrn": 1,
"inn": 2,
"registry_number": 3,
"product_name": 4,
}
result = client._parse_row(
(fake.company(), _digits(13), _digits(10), "", ""), header_map
)
self.assertIsNone(result)
@tag("integration", "slow")
class IndustrialProductionClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_certificates_local_server(self):
excel_bytes, rows = build_minpromtorg_certificates_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_resolutions_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductionClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductionClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), len(rows))
@tag("integration", "slow")
class ManufacturesClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_manufacturers_local_server(self):
excel_bytes, rows = build_minpromtorg_manufacturers_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"data_orgs_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": ManufacturesClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = ManufacturesClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), len(rows))
@tag("integration", "slow")
class IndustrialProductsClientIntegrationTest(TestCase):
"""Integration test using local HTTP server instead of external API."""
def test_fetch_products_local_server(self):
excel_bytes, rows = build_minpromtorg_products_excel(count=3)
date_str = fake.date_between(start_date="-30d", end_date="today").strftime(
"%Y%m%d"
)
file_name = f"industrial_products_{date_str}.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", excel_bytes)
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
timeout=30,
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), len(rows))
class ProverkiClientTest(TestCase):
"""Tests for ProverkiClient."""
def test_client_initialization(self):
client = ProverkiClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "proverki.gov.ru")
def test_client_with_proxies(self):
proxies = [_proxy_address(), _proxy_address()]
client = ProverkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
with ProverkiClient() as client:
self.assertIsInstance(client, ProverkiClient)
def test_parse_xml_content_english_tags(self):
client = ProverkiClient()
xml_content, rows = build_proverki_xml(count=2)
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), len(rows))
self.assertIsInstance(inspections[0], Inspection)
self.assertSetEqual(
{i.registration_number for i in inspections},
{r.registration_number for r in rows},
)
def test_parse_xml_content_russian_tags(self):
client = ProverkiClient()
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
ogrn = "".join(str(fake.random_int(0, 9)) for _ in range(13))
org_name = fake.company()
authority = fake.company()
xml_content = (
"<?xml version='1.0' encoding='utf-8'?>"
"<Проверки>"
"<КНМ>"
f"<УчетныйНомер>{reg_num}</УчетныйНомер>"
f"<ИНН>{inn}</ИНН>"
f"<ОГРН>{ogrn}</ОГРН>"
f"<Наименование>{org_name}</Наименование>"
f"<КонтрольныйОрган>{authority}</КонтрольныйОрган>"
"</КНМ>"
"</Проверки>"
).encode()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].registration_number, reg_num)
self.assertEqual(inspections[0].inn, inn)
self.assertEqual(inspections[0].control_authority, authority)
def test_parse_xml_record_with_attributes(self):
from defusedxml import ElementTree as ET
row_inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
element = ET.fromstring(
f'<inspection inn="{row_inn}" registration_number="{reg_num}" />'
) # noqa: S314
client = ProverkiClient()
result = client._parse_xml_record(element)
self.assertIsNotNone(result)
self.assertEqual(result.inn, row_inn)
self.assertEqual(result.registration_number, reg_num)
def test_parse_xml_record_invalid(self):
from defusedxml import ElementTree as ET
element = ET.fromstring("<empty_record></empty_record>") # noqa: S314
client = ProverkiClient()
self.assertIsNone(client._parse_xml_record(element))
def test_parse_windows_1251_encoding(self):
org_name = fake.company()
inn = "".join(str(fake.random_int(0, 9)) for _ in range(10))
reg_num = "".join(str(fake.random_int(0, 9)) for _ in range(12))
xml_content = (
'<?xml version="1.0" encoding="windows-1251"?>'
"<inspections>"
"<inspection>"
f"<inn>{inn}</inn>"
f"<registration_number>{reg_num}</registration_number>"
f"<organisation_name>{org_name}</organisation_name>"
"</inspection>"
"</inspections>"
).encode("windows-1251")
client = ProverkiClient()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].organisation_name, org_name)
def test_fetch_inspections_with_file_url(self):
xml_content, rows = build_proverki_xml(count=2)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))
def test_fetch_inspections_no_files(self):
client = ProverkiClient(use_playwright=False)
inspections = client.fetch_inspections()
self.assertEqual(inspections, [])
@tag("integration", "slow")
class ProverkiClientIntegrationTest(TestCase):
"""Integration test using local HTTP server for proverki.gov.ru."""
def test_fetch_inspections_local_server(self):
xml_content, rows = build_proverki_xml(count=3)
with TestHTTPServer() as server:
server.add_bytes(
"/files/inspections.xml", xml_content, content_type="text/xml"
)
client = ProverkiClient(
host=_host_from_base_url(server.base_url),
scheme="http",
use_playwright=False,
http_adapter=server.adapter,
)
inspections = client.fetch_inspections(
file_url=f"{server.base_url}/files/inspections.xml"
)
self.assertEqual(len(inspections), len(rows))