All checks were successful
CI/CD Pipeline / Code Quality Checks (pull_request) Successful in 1m6s
CI/CD Pipeline / Run Tests (pull_request) Successful in 1m18s
CI/CD Pipeline / Build Docker Images (pull_request) Has been skipped
CI/CD Pipeline / Push to Gitea Registry (pull_request) Has been skipped
1086 lines
43 KiB
Python
1086 lines
43 KiB
Python
"""Tests for parsers clients."""
|
||
|
||
import json
|
||
import zipfile
|
||
from io import BytesIO
|
||
from unittest.mock import Mock, patch
|
||
|
||
from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError
|
||
from apps.parsers.clients.common import (
|
||
GenericParserItem,
|
||
StructuredDataClient,
|
||
StructuredDataClientError,
|
||
)
|
||
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
|
||
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
|
||
from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer
|
||
from apps.parsers.clients.proverki import ProverkiClient
|
||
from apps.parsers.clients.proverki.schemas import Inspection
|
||
from apps.parsers.clients.trudvsem import TrudvsemClient
|
||
from apps.parsers.models import ParserLoadLog
|
||
from django.test import TestCase, tag
|
||
from faker import Faker
|
||
from openpyxl import Workbook
|
||
|
||
fake = Faker("ru_RU")
|
||
|
||
|
||
class BaseHTTPClientTest(TestCase):
|
||
"""Tests for BaseHTTPClient."""
|
||
|
||
def test_client_initialization(self):
|
||
"""Test client initializes with defaults."""
|
||
client = BaseHTTPClient(base_url="https://example.com")
|
||
|
||
self.assertEqual(client.base_url, "https://example.com")
|
||
self.assertIsNone(client.proxies)
|
||
self.assertEqual(client.timeout, 30)
|
||
|
||
def test_client_with_proxies(self):
|
||
"""Test client initializes with proxy list."""
|
||
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
|
||
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
|
||
|
||
self.assertEqual(client.proxies, proxies)
|
||
|
||
def test_select_proxy_returns_none_without_proxies(self):
|
||
"""Test _select_proxy returns None when no proxies."""
|
||
client = BaseHTTPClient(base_url="https://example.com")
|
||
self.assertIsNone(client._select_proxy())
|
||
|
||
def test_select_proxy_returns_random_from_list(self):
|
||
"""Test _select_proxy returns proxy from list."""
|
||
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
|
||
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
|
||
|
||
selected = client._select_proxy()
|
||
self.assertIn(selected, proxies)
|
||
|
||
def test_current_proxy_property(self):
|
||
"""Test current_proxy property is None before session creation."""
|
||
proxies = ["http://proxy:8080"]
|
||
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
|
||
|
||
# current_proxy is None until session is created
|
||
proxy = client.current_proxy
|
||
self.assertIsNone(proxy)
|
||
|
||
# After accessing session, proxy should be set
|
||
_ = client.session
|
||
proxy = client.current_proxy
|
||
self.assertEqual(proxy, "http://proxy:8080")
|
||
|
||
def test_download_file_rejects_large_content_length_before_body_read(self):
|
||
"""Test download_file checks Content-Length before reading response body."""
|
||
client = BaseHTTPClient(base_url="https://example.com")
|
||
response = Mock()
|
||
response.ok = True
|
||
response.headers = {"Content-Length": "10"}
|
||
response.iter_content.return_value = [b"too-large"]
|
||
response.close = Mock()
|
||
client.session.get = Mock(return_value=response)
|
||
|
||
with self.assertRaises(HTTPClientError):
|
||
client.download_file("/data.csv", max_size_bytes=5)
|
||
|
||
response.iter_content.assert_not_called()
|
||
response.close.assert_called_once()
|
||
|
||
def test_download_file_passes_ssl_verification_flag(self):
|
||
"""Test download_file can disable SSL verification for broken upstream TLS."""
|
||
client = BaseHTTPClient(base_url="https://example.com", verify_ssl=False)
|
||
response = Mock()
|
||
response.ok = True
|
||
response.headers = {"Content-Length": "4"}
|
||
response.iter_content.return_value = [b"data"]
|
||
response.close = Mock()
|
||
client.session.get = Mock(return_value=response)
|
||
|
||
content = client.download_file("/data.csv")
|
||
|
||
self.assertEqual(content, b"data")
|
||
client.session.get.assert_called_once_with(
|
||
"https://example.com/data.csv",
|
||
stream=True,
|
||
timeout=30,
|
||
verify=False,
|
||
)
|
||
|
||
|
||
class StructuredDataClientTest(TestCase):
|
||
"""Tests for StructuredDataClient."""
|
||
|
||
def test_parse_json_records(self):
|
||
"""Test JSON parsing and normalization."""
|
||
payload = {
|
||
"data": [
|
||
{
|
||
"id": "FIN-1",
|
||
"inn": "1234567890",
|
||
"ogrn": "1234567890123",
|
||
"name": "Test Company",
|
||
"amount": "10 500,50",
|
||
"date": "2024",
|
||
}
|
||
]
|
||
}
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
|
||
|
||
records = client.fetch_records(
|
||
content=json.dumps(payload).encode("utf-8"),
|
||
file_name="data.json",
|
||
)
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertIsInstance(records[0], GenericParserItem)
|
||
self.assertEqual(records[0].external_id, "FIN-1")
|
||
self.assertEqual(records[0].inn, "1234567890")
|
||
self.assertEqual(str(records[0].amount), "10500.50")
|
||
|
||
def test_parse_csv_records(self):
|
||
"""Test CSV parsing with Russian headers."""
|
||
content = (
|
||
"реестровый номер;ИНН;наименование;сумма\n"
|
||
"RN-1;1234567890;ООО Тест;1000.00\n"
|
||
).encode("cp1251")
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.UNFAIR_SUPPLIERS)
|
||
|
||
records = client.fetch_records(content=content, file_name="data.csv")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "RN-1")
|
||
self.assertEqual(records[0].organisation_name, "ООО Тест")
|
||
|
||
def test_parse_xml_records_under_wrapper(self):
|
||
"""Test XML parser selects repeated nested record elements."""
|
||
content = (
|
||
"<root><items>"
|
||
"<item><id>XML-1</id><inn>123</inn><name>ООО А</name></item>"
|
||
"<item><id>XML-2</id><inn>456</inn><name>ООО Б</name></item>"
|
||
"</items></root>"
|
||
).encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
|
||
|
||
records = client.fetch_records(content=content, file_name="data.xml")
|
||
|
||
self.assertEqual(len(records), 2)
|
||
self.assertEqual(records[0].external_id, "XML-1")
|
||
self.assertEqual(records[1].external_id, "XML-2")
|
||
|
||
def test_json_payload_preserves_nested_objects(self):
|
||
"""Test payload keeps nested JSON structures machine-readable."""
|
||
payload = {
|
||
"data": [
|
||
{
|
||
"id": "NESTED-1",
|
||
"company": {"inn": "123", "name": "ООО А"},
|
||
"amounts": [1, 2],
|
||
}
|
||
]
|
||
}
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
|
||
|
||
record = client.fetch_records(
|
||
content=json.dumps(payload).encode("utf-8"),
|
||
file_name="data.json",
|
||
)[0]
|
||
|
||
self.assertEqual(record.payload["company"], {"inn": "123", "name": "ООО А"})
|
||
self.assertEqual(record.payload["amounts"], [1, 2])
|
||
|
||
def test_fallback_external_id_is_stable_after_reordering(self):
|
||
"""Test generated external_id does not depend on row position."""
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
|
||
first = client.fetch_records(
|
||
content=("name;amount\nООО А;10\nООО Б;20\n").encode(),
|
||
file_name="data.csv",
|
||
)
|
||
second = client.fetch_records(
|
||
content=("name;amount\nООО Б;20\nООО А;10\n").encode(),
|
||
file_name="data.csv",
|
||
)
|
||
|
||
self.assertEqual(first[0].external_id, second[1].external_id)
|
||
self.assertEqual(first[1].external_id, second[0].external_id)
|
||
|
||
def test_zip_rejects_too_many_supported_files(self):
|
||
"""Test ZIP parser refuses archives with too many supported files."""
|
||
archive_content = BytesIO()
|
||
with zipfile.ZipFile(archive_content, "w") as archive:
|
||
archive.writestr("one.csv", "id\n1\n")
|
||
archive.writestr("two.csv", "id\n2\n")
|
||
|
||
client = StructuredDataClient(
|
||
source=ParserLoadLog.Source.FNS_FINANCIAL,
|
||
max_zip_entries=1,
|
||
)
|
||
|
||
with self.assertRaises(StructuredDataClientError):
|
||
client.fetch_records(
|
||
content=archive_content.getvalue(),
|
||
file_name="data.zip",
|
||
)
|
||
|
||
def test_html_without_table_returns_empty_records(self):
|
||
"""Test generic parser does not treat HTML pages as malformed XML."""
|
||
content = b"<!doctype html><html><body><main>No table</main></body></html>"
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
|
||
|
||
records = client.fetch_records(content=content, file_name="")
|
||
|
||
self.assertEqual(records, [])
|
||
|
||
def test_html_table_after_long_head_is_detected(self):
|
||
"""Test HTML detection scans beyond the first kilobyte."""
|
||
content = (
|
||
"<!doctype html><html><head>" + (" " * 1500) + "</head><body><table>"
|
||
"<tr><th>id</th><th>inn</th></tr>"
|
||
"<tr><td>HTML-1</td><td>1234567890</td></tr>"
|
||
"</table></body></html>"
|
||
).encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
|
||
|
||
records = client.fetch_records(content=content, file_name="")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "HTML-1")
|
||
self.assertEqual(records[0].inn, "1234567890")
|
||
|
||
def test_html_layout_table_without_headers_is_ignored(self):
|
||
"""Test layout/navigation tables are not imported as records."""
|
||
content = (
|
||
"<html><body><table>"
|
||
"<tr><td>Картотека</td><td>Страж</td></tr>"
|
||
"<tr><td>Календарь</td><td>Мой Арбитр</td></tr>"
|
||
"</table></body></html>"
|
||
).encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
|
||
|
||
records = client.fetch_records(content=content, file_name="")
|
||
|
||
self.assertEqual(records, [])
|
||
|
||
@patch.object(BaseHTTPClient, "post_json")
|
||
def test_mpt_products_page_uses_official_search_api(self, mock_post_json):
|
||
"""Test GISP product page uses the official paginated UI API."""
|
||
mock_post_json.return_value = {
|
||
"ok": True,
|
||
"total_count": 1,
|
||
"items": [
|
||
{
|
||
"org_name": "ООО Производитель",
|
||
"org_inn": "7701000000",
|
||
"org_ogrn": "1027700000000",
|
||
"product_reg_number_2023": "10165413",
|
||
"product_name": "Средство дезинфицирующее",
|
||
"res_date": "2026-04-25",
|
||
"product_gisp_url": "https://gisp.gov.ru/goods/#/product/1",
|
||
}
|
||
],
|
||
}
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.MPT_PRODUCTS)
|
||
|
||
records = client.fetch_records(file_url="https://gisp.gov.ru/pp719v2/pub/prod/")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "10165413")
|
||
self.assertEqual(records[0].inn, "7701000000")
|
||
self.assertEqual(records[0].organisation_name, "ООО Производитель")
|
||
self.assertEqual(records[0].title, "Средство дезинфицирующее")
|
||
self.assertEqual(
|
||
mock_post_json.call_args.args[0],
|
||
"https://gisp.gov.ru/pp719v2/pub/prod/b/",
|
||
)
|
||
|
||
def test_zakupki_cards_are_parsed_as_records(self):
|
||
"""Test ЕИС search cards are parsed when there is no HTML table."""
|
||
content = """
|
||
<html><body>
|
||
<div class="search-registry-entry-block">
|
||
<a href="/epz/order/notice/ea20/view/common-info.html?regNumber=0331">№ 0331</a>
|
||
<div>Работа комиссии</div>
|
||
<div>Объект закупки</div><div>Поставка оборудования</div>
|
||
<div>Заказчик</div><div>ГКУ Тест</div>
|
||
<div>Начальная цена</div><div>649 989,52 ₽</div>
|
||
<div>Размещено</div><div>20.04.2026</div>
|
||
</div>
|
||
</body></html>
|
||
""".encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.PROCUREMENTS_44FZ)
|
||
|
||
records = client.fetch_records(content=content, file_name="search.html")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "0331")
|
||
self.assertEqual(records[0].title, "Поставка оборудования")
|
||
self.assertEqual(records[0].organisation_name, "ГКУ Тест")
|
||
self.assertEqual(str(records[0].amount), "649989.52")
|
||
self.assertEqual(records[0].record_date, "20.04.2026")
|
||
|
||
def test_html_table_with_td_header_row_is_parsed(self):
|
||
"""Test registry tables without th still parse when first row is a header."""
|
||
content = """
|
||
<html><body><table>
|
||
<tr><td>Номер реестровой записи</td><td>Информация о лице</td><td>ИНН</td></tr>
|
||
<tr><td>ГОЗ-1</td><td>ООО Оборона</td><td>7701000000</td></tr>
|
||
</table></body></html>
|
||
""".encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
|
||
|
||
records = client.fetch_records(content=content, file_name="fas.html")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "ГОЗ-1")
|
||
self.assertEqual(records[0].inn, "7701000000")
|
||
self.assertEqual(records[0].organisation_name, "ООО Оборона")
|
||
|
||
def test_fas_goz_multirow_header_table_is_parsed(self):
|
||
"""Test FAS GOZ table skips multirow headers and column-number rows."""
|
||
content = """
|
||
<html><body><table>
|
||
<tr><td rowspan="2">Номер реестровой записи</td><td rowspan="2">Орган</td>
|
||
<td colspan="3">Постановление</td><td colspan="3">Лицо</td></tr>
|
||
<tr><td>номер</td><td>дата</td><td>исполнение</td>
|
||
<td>полное наименование</td><td>адрес</td><td>ИНН</td></tr>
|
||
<tr><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td></tr>
|
||
<tr><td>1</td><td>Нижегородское УФАС России</td>
|
||
<td>№ 052/04/7.29.2-2965/2023 от 22.01.2024</td>
|
||
<td>28.10.2025</td><td>В стадии исполнения</td>
|
||
<td>АО УАПО</td><td>АО УАПО</td><td>г. Уфа</td><td>0275074279</td></tr>
|
||
</table></body></html>
|
||
""".encode()
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FAS_GOZ)
|
||
|
||
records = client.fetch_records(content=content, file_name="fas.html")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "1")
|
||
self.assertEqual(records[0].inn, "0275074279")
|
||
self.assertEqual(records[0].organisation_name, "АО УАПО")
|
||
self.assertEqual(records[0].record_date, "28.10.2025")
|
||
self.assertEqual(records[0].status, "В стадии исполнения")
|
||
|
||
def test_fns_nested_bfo_fields_are_normalized(self):
|
||
"""Test FNS JSON keeps nested payload and maps useful BFO fields."""
|
||
payload = {
|
||
"content": [
|
||
{
|
||
"id": 6622458,
|
||
"inn": "<strong>7736050003</strong>",
|
||
"shortName": 'ПАО "ГАЗПРОМ"',
|
||
"ogrn": "1027700070518",
|
||
"statusCode": "ACTIVE",
|
||
"bfo": {
|
||
"period": "2025",
|
||
"actualBfoDate": "2026-03-16",
|
||
"gainSum": 5846351786,
|
||
},
|
||
}
|
||
]
|
||
}
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
|
||
|
||
records = client.fetch_records(
|
||
content=json.dumps(payload).encode("utf-8"),
|
||
file_name="fns.json",
|
||
)
|
||
|
||
self.assertEqual(records[0].external_id, "6622458")
|
||
self.assertEqual(records[0].inn, "7736050003")
|
||
self.assertEqual(records[0].organisation_name, 'ПАО "ГАЗПРОМ"')
|
||
self.assertEqual(records[0].record_date, "2026-03-16")
|
||
self.assertEqual(str(records[0].amount), "5846351786")
|
||
self.assertEqual(records[0].status, "ACTIVE")
|
||
|
||
@patch.object(BaseHTTPClient, "download_file")
|
||
def test_fstec_page_discovers_csv_download(self, mock_download):
|
||
"""Test FSTEC registry page follows the official CSV download link."""
|
||
mock_download.side_effect = [
|
||
(
|
||
'<html><a href="/reg3?option=com_rajax&module=rfiles&'
|
||
'method=download&format=file&mod=209&file=1">Государственный '
|
||
"реестр ССЗИ</a></html>"
|
||
).encode(),
|
||
(
|
||
'"№ сертификата","Дата внесения в реестр","Срок действия сертификата",'
|
||
'"Наименование средства (шифр)","Заявитель"\n'
|
||
'"17/1","2002-07-26","2020-08-01","ФСПК-100","ООО НПП ЭЛКОМ"\n'
|
||
).encode(),
|
||
]
|
||
client = StructuredDataClient(source=ParserLoadLog.Source.FSTEC)
|
||
|
||
records = client.fetch_records(file_url="https://reestr.fstec.ru/reg3")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "17/1")
|
||
self.assertEqual(records[0].organisation_name, "ООО НПП ЭЛКОМ")
|
||
self.assertEqual(records[0].title, "ФСПК-100")
|
||
self.assertEqual(records[0].record_date, "2002-07-26")
|
||
self.assertEqual(records[0].status, "2020-08-01")
|
||
|
||
|
||
class TrudvsemClientTest(TestCase):
|
||
"""Tests for TrudvsemClient."""
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_fetch_vacancies_success(self, mock_get_json):
|
||
"""Test successful vacancies fetching."""
|
||
mock_get_json.return_value = {
|
||
"results": {
|
||
"vacancies": [
|
||
{
|
||
"vacancy": {
|
||
"id": "VAC-1",
|
||
"job-name": "Инженер",
|
||
"creation-date": "2026-01-01",
|
||
"salary": {"from": 120000},
|
||
"company": {
|
||
"name": "ООО Тест",
|
||
"inn": "1234567890",
|
||
"ogrn": "1234567890123",
|
||
},
|
||
"vac_url": "https://trudvsem.ru/vacancy/VAC-1",
|
||
}
|
||
}
|
||
]
|
||
}
|
||
}
|
||
|
||
with TrudvsemClient() as client:
|
||
records = client.fetch_vacancies(limit=1)
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "VAC-1")
|
||
self.assertEqual(records[0].source, ParserLoadLog.Source.TRUDVSEM)
|
||
self.assertEqual(records[0].inn, "1234567890")
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_fetch_vacancies_by_company_inn_scans_pages(self, mock_get_json):
|
||
"""Test company_inn search scans next pages instead of false empty result."""
|
||
mock_get_json.side_effect = [
|
||
{
|
||
"results": {
|
||
"vacancies": [
|
||
{
|
||
"vacancy": {
|
||
"id": "VAC-OTHER",
|
||
"company": {"inn": "0000000000"},
|
||
}
|
||
}
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"results": {
|
||
"vacancies": [
|
||
{
|
||
"vacancy": {
|
||
"id": "VAC-MATCH",
|
||
"company": {"inn": "1234567890"},
|
||
}
|
||
}
|
||
]
|
||
}
|
||
},
|
||
]
|
||
|
||
with TrudvsemClient(company_search_max_pages=2) as client:
|
||
records = client.fetch_vacancies(limit=1, company_inn="1234567890")
|
||
|
||
self.assertEqual(len(records), 1)
|
||
self.assertEqual(records[0].external_id, "VAC-MATCH")
|
||
self.assertEqual(mock_get_json.call_args_list[0].kwargs["params"]["offset"], 0)
|
||
self.assertEqual(mock_get_json.call_args_list[1].kwargs["params"]["offset"], 1)
|
||
|
||
|
||
def _create_test_excel_certificates() -> bytes:
|
||
"""Create test Excel file with certificate data."""
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
|
||
# Header
|
||
ws.append(
|
||
[
|
||
"issue_date",
|
||
"certificate_number",
|
||
"expiry_date",
|
||
"certificate_file_url",
|
||
"organisation_name",
|
||
"inn",
|
||
"ogrn",
|
||
]
|
||
)
|
||
|
||
# Data rows
|
||
for i in range(5):
|
||
ws.append(
|
||
[
|
||
"2024-01-01",
|
||
f"CERT-{i:04d}",
|
||
"2025-01-01",
|
||
f"https://example.com/cert{i}.pdf",
|
||
f"Company {i} LLC",
|
||
f"123456789{i}",
|
||
f"123456789012{i}",
|
||
]
|
||
)
|
||
|
||
output = BytesIO()
|
||
wb.save(output)
|
||
output.seek(0)
|
||
return output.read()
|
||
|
||
|
||
def _create_test_excel_manufacturers() -> bytes:
|
||
"""Create test Excel file with manufacturer data."""
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
|
||
# Header
|
||
ws.append(["full_legal_name", "inn", "ogrn", "address"])
|
||
|
||
# Data rows
|
||
for i in range(5):
|
||
ws.append(
|
||
[
|
||
f"Manufacturer {i} LLC",
|
||
f"123456789{i}",
|
||
f"123456789012{i}",
|
||
f"Address {i}, City",
|
||
]
|
||
)
|
||
|
||
output = BytesIO()
|
||
wb.save(output)
|
||
output.seek(0)
|
||
return output.read()
|
||
|
||
|
||
class IndustrialProductionClientTest(TestCase):
|
||
"""Tests for IndustrialProductionClient."""
|
||
|
||
def test_client_initialization(self):
|
||
"""Test client initializes correctly."""
|
||
client = IndustrialProductionClient()
|
||
|
||
self.assertIsNone(client.proxies)
|
||
self.assertEqual(client.host, "minpromtorg.gov.ru")
|
||
|
||
def test_client_with_proxies(self):
|
||
"""Test client accepts proxy list."""
|
||
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
|
||
client = IndustrialProductionClient(proxies=proxies)
|
||
|
||
self.assertEqual(client.proxies, proxies)
|
||
|
||
def test_context_manager(self):
|
||
"""Test client works as context manager."""
|
||
with IndustrialProductionClient() as client:
|
||
self.assertIsInstance(client, IndustrialProductionClient)
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
@patch.object(BaseHTTPClient, "download_file")
|
||
def test_fetch_certificates_success(self, mock_download, mock_get_json):
|
||
"""Test successful certificate fetching."""
|
||
# Mock API response
|
||
mock_get_json.return_value = {
|
||
"data": [
|
||
{
|
||
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
|
||
"files": [
|
||
{
|
||
"name": "data_resolutions_20240101.xlsx",
|
||
"url": "/files/test.xlsx",
|
||
},
|
||
],
|
||
}
|
||
]
|
||
}
|
||
|
||
# Mock Excel download
|
||
mock_download.return_value = _create_test_excel_certificates()
|
||
|
||
with IndustrialProductionClient() as client:
|
||
certificates = client.fetch_certificates()
|
||
|
||
self.assertEqual(len(certificates), 5)
|
||
self.assertIsInstance(certificates[0], IndustrialCertificate)
|
||
self.assertEqual(certificates[0].certificate_number, "CERT-0000")
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_fetch_certificates_no_files(self, mock_get_json):
|
||
"""Test returns empty list when no files found."""
|
||
mock_get_json.return_value = {"data": []}
|
||
|
||
with IndustrialProductionClient() as client:
|
||
certificates = client.fetch_certificates()
|
||
|
||
self.assertEqual(certificates, [])
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_get_latest_file_url_selects_newest(self, mock_get_json):
|
||
"""Test selects file with latest date."""
|
||
mock_get_json.return_value = {
|
||
"data": [
|
||
{
|
||
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
|
||
"files": [
|
||
{
|
||
"name": "data_resolutions_20240101.xlsx",
|
||
"url": "/files/old.xlsx",
|
||
},
|
||
{
|
||
"name": "data_resolutions_20240315.xlsx",
|
||
"url": "/files/new.xlsx",
|
||
},
|
||
{
|
||
"name": "data_resolutions_20240201.xlsx",
|
||
"url": "/files/mid.xlsx",
|
||
},
|
||
],
|
||
}
|
||
]
|
||
}
|
||
|
||
client = IndustrialProductionClient()
|
||
files_data = client._fetch_files_list()
|
||
url = client._get_latest_file_url(files_data)
|
||
|
||
self.assertIn("new.xlsx", url)
|
||
|
||
def test_parse_row_valid(self):
|
||
"""Test parsing valid row."""
|
||
client = IndustrialProductionClient()
|
||
row = (
|
||
"2024-01-01",
|
||
"CERT-123",
|
||
"2025-01-01",
|
||
"https://example.com/cert.pdf",
|
||
"Test Company",
|
||
"1234567890",
|
||
"1234567890123",
|
||
)
|
||
|
||
result = client._parse_row(row)
|
||
|
||
self.assertIsInstance(result, IndustrialCertificate)
|
||
self.assertEqual(result.certificate_number, "CERT-123")
|
||
self.assertEqual(result.inn, "1234567890")
|
||
|
||
def test_parse_row_invalid(self):
|
||
"""Test parsing invalid row returns None."""
|
||
client = IndustrialProductionClient()
|
||
row = ("only", "two") # Not enough columns
|
||
|
||
result = client._parse_row(row)
|
||
|
||
self.assertIsNone(result)
|
||
|
||
|
||
class ManufacturesClientTest(TestCase):
|
||
"""Tests for ManufacturesClient."""
|
||
|
||
def test_client_initialization(self):
|
||
"""Test client initializes correctly."""
|
||
client = ManufacturesClient()
|
||
|
||
self.assertIsNone(client.proxies)
|
||
self.assertEqual(client.host, "minpromtorg.gov.ru")
|
||
|
||
def test_client_with_proxies(self):
|
||
"""Test client accepts proxy list."""
|
||
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
|
||
client = ManufacturesClient(proxies=proxies)
|
||
|
||
self.assertEqual(client.proxies, proxies)
|
||
|
||
def test_context_manager(self):
|
||
"""Test client works as context manager."""
|
||
with ManufacturesClient() as client:
|
||
self.assertIsInstance(client, ManufacturesClient)
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
@patch.object(BaseHTTPClient, "download_file")
|
||
def test_fetch_manufacturers_success(self, mock_download, mock_get_json):
|
||
"""Test successful manufacturer fetching."""
|
||
# Mock API response
|
||
mock_get_json.return_value = {
|
||
"data": [
|
||
{
|
||
"name": "Производители промышленной продукции",
|
||
"files": [
|
||
{"name": "data_orgs_20240101.xlsx", "url": "/files/test.xlsx"},
|
||
],
|
||
}
|
||
]
|
||
}
|
||
|
||
# Mock Excel download
|
||
mock_download.return_value = _create_test_excel_manufacturers()
|
||
|
||
with ManufacturesClient() as client:
|
||
manufacturers = client.fetch_manufacturers()
|
||
|
||
self.assertEqual(len(manufacturers), 5)
|
||
self.assertIsInstance(manufacturers[0], Manufacturer)
|
||
self.assertEqual(manufacturers[0].full_legal_name, "Manufacturer 0 LLC")
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_fetch_manufacturers_no_files(self, mock_get_json):
|
||
"""Test returns empty list when no files found."""
|
||
mock_get_json.return_value = {"data": []}
|
||
|
||
with ManufacturesClient() as client:
|
||
manufacturers = client.fetch_manufacturers()
|
||
|
||
self.assertEqual(manufacturers, [])
|
||
|
||
@patch.object(BaseHTTPClient, "get_json")
|
||
def test_get_latest_file_url_selects_newest(self, mock_get_json):
|
||
"""Test selects file with latest date."""
|
||
mock_get_json.return_value = {
|
||
"data": [
|
||
{
|
||
"name": "Производители промышленной продукции",
|
||
"files": [
|
||
{"name": "data_orgs_20240101.xlsx", "url": "/files/old.xlsx"},
|
||
{"name": "data_orgs_20240315.xlsx", "url": "/files/new.xlsx"},
|
||
{"name": "data_orgs_20240201.xlsx", "url": "/files/mid.xlsx"},
|
||
],
|
||
}
|
||
]
|
||
}
|
||
|
||
client = ManufacturesClient()
|
||
files_data = client._fetch_files_list()
|
||
url = client._get_latest_file_url(files_data)
|
||
|
||
self.assertIn("new.xlsx", url)
|
||
|
||
def test_parse_row_valid(self):
|
||
"""Test parsing valid row."""
|
||
client = ManufacturesClient()
|
||
row = ("Test Company LLC", "1234567890", "1234567890123", "Test Address")
|
||
|
||
result = client._parse_row(row)
|
||
|
||
self.assertIsInstance(result, Manufacturer)
|
||
self.assertEqual(result.full_legal_name, "Test Company LLC")
|
||
self.assertEqual(result.inn, "1234567890")
|
||
|
||
def test_parse_row_without_address(self):
|
||
"""Test parsing row without address."""
|
||
client = ManufacturesClient()
|
||
row = ("Test Company LLC", "1234567890", "1234567890123")
|
||
|
||
result = client._parse_row(row)
|
||
|
||
self.assertIsInstance(result, Manufacturer)
|
||
self.assertEqual(result.address, "")
|
||
|
||
|
||
@tag("integration", "slow", "network")
|
||
class IndustrialProductionClientIntegrationTest(TestCase):
|
||
"""
|
||
Интеграционные тесты с реальной загрузкой данных.
|
||
|
||
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
|
||
Запускать с тегом: python manage.py test --tag=integration
|
||
"""
|
||
|
||
def test_fetch_certificates_real_data(self):
|
||
"""
|
||
Интеграционный тест: реальная загрузка сертификатов с gisp.gov.ru.
|
||
|
||
Этот тест:
|
||
1. Подключается к реальному API
|
||
2. Скачивает Excel файл
|
||
3. Парсит данные
|
||
4. Проверяет структуру результата
|
||
|
||
Тест может занять время и зависит от доступности внешнего сервера.
|
||
"""
|
||
try:
|
||
with IndustrialProductionClient(timeout=120) as client:
|
||
certificates = client.fetch_certificates()
|
||
|
||
# Проверяем что данные получены
|
||
self.assertIsInstance(certificates, list)
|
||
|
||
# Если данные есть - проверяем структуру
|
||
if certificates:
|
||
cert = certificates[0]
|
||
self.assertIsInstance(cert, IndustrialCertificate)
|
||
self.assertIsNotNone(cert.certificate_number)
|
||
self.assertIsNotNone(cert.inn)
|
||
self.assertIsNotNone(cert.organisation_name)
|
||
|
||
# Логируем для информации
|
||
print(f"\n[INTEGRATION] Loaded {len(certificates)} certificates")
|
||
print(f"[INTEGRATION] First certificate: {cert.certificate_number}")
|
||
print(f"[INTEGRATION] Organisation: {cert.organisation_name}")
|
||
else:
|
||
print("\n[INTEGRATION] No certificates found (API may be unavailable)")
|
||
|
||
except HTTPClientError as e:
|
||
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
|
||
self.skipTest(f"External API unavailable: {e}")
|
||
|
||
|
||
@tag("integration", "slow", "network")
|
||
class ManufacturesClientIntegrationTest(TestCase):
|
||
"""
|
||
Интеграционные тесты для клиента производителей.
|
||
|
||
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
|
||
Запускать с тегом: python manage.py test --tag=integration
|
||
"""
|
||
|
||
def test_fetch_manufacturers_real_data(self):
|
||
"""
|
||
Интеграционный тест: реальная загрузка производителей с gisp.gov.ru.
|
||
"""
|
||
try:
|
||
with ManufacturesClient(timeout=120) as client:
|
||
manufacturers = client.fetch_manufacturers()
|
||
|
||
# Проверяем что данные получены
|
||
self.assertIsInstance(manufacturers, list)
|
||
|
||
# Если данные есть - проверяем структуру
|
||
if manufacturers:
|
||
m = manufacturers[0]
|
||
self.assertIsInstance(m, Manufacturer)
|
||
self.assertIsNotNone(m.full_legal_name)
|
||
self.assertIsNotNone(m.inn)
|
||
|
||
# Логируем для информации
|
||
print(f"\n[INTEGRATION] Loaded {len(manufacturers)} manufacturers")
|
||
print(f"[INTEGRATION] First manufacturer: {m.full_legal_name}")
|
||
print(f"[INTEGRATION] INN: {m.inn}")
|
||
else:
|
||
print("\n[INTEGRATION] No manufacturers found (API may be unavailable)")
|
||
|
||
except HTTPClientError as e:
|
||
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
|
||
self.skipTest(f"External API unavailable: {e}")
|
||
|
||
|
||
def _create_test_xml_inspections() -> bytes:
|
||
"""Create test XML file with inspection data."""
|
||
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||
<inspections>
|
||
<inspection>
|
||
<registration_number>772024000001</registration_number>
|
||
<inn>7701234567</inn>
|
||
<ogrn>1027700000001</ogrn>
|
||
<organisation_name>ООО "Тест Компания 1"</organisation_name>
|
||
<control_authority>Роспотребнадзор</control_authority>
|
||
<inspection_type>плановая</inspection_type>
|
||
<inspection_form>документарная</inspection_form>
|
||
<start_date>2024-01-15</start_date>
|
||
<end_date>2024-01-30</end_date>
|
||
<status>завершена</status>
|
||
<legal_basis>294-ФЗ</legal_basis>
|
||
<result>нарушения не выявлены</result>
|
||
</inspection>
|
||
<inspection>
|
||
<registration_number>772024000002</registration_number>
|
||
<inn>7702345678</inn>
|
||
<ogrn>1027700000002</ogrn>
|
||
<organisation_name>АО "Тест Компания 2"</organisation_name>
|
||
<control_authority>Ростехнадзор</control_authority>
|
||
<inspection_type>внеплановая</inspection_type>
|
||
<inspection_form>выездная</inspection_form>
|
||
<start_date>2024-02-01</start_date>
|
||
<end_date>2024-02-15</end_date>
|
||
<status>завершена</status>
|
||
<legal_basis>248-ФЗ</legal_basis>
|
||
<result>выявлены нарушения</result>
|
||
</inspection>
|
||
</inspections>"""
|
||
return xml_content.encode("utf-8")
|
||
|
||
|
||
def _create_test_xml_inspections_russian_tags() -> bytes:
|
||
"""Create test XML with Russian tag names."""
|
||
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||
<Проверки>
|
||
<КНМ>
|
||
<УчетныйНомер>772024000003</УчетныйНомер>
|
||
<ИНН>7703456789</ИНН>
|
||
<ОГРН>1027700000003</ОГРН>
|
||
<Наименование>ПАО "Тест Компания 3"</Наименование>
|
||
<КонтрольныйОрган>МЧС России</КонтрольныйОрган>
|
||
<ТипПроверки>плановая</ТипПроверки>
|
||
<ФормаПроверки>документарная и выездная</ФормаПроверки>
|
||
<ДатаНачала>2024-03-01</ДатаНачала>
|
||
<ДатаОкончания>2024-03-20</ДатаОкончания>
|
||
<Статус>в процессе</Статус>
|
||
<ПравовоеОснование>294-ФЗ</ПравовоеОснование>
|
||
</КНМ>
|
||
</Проверки>"""
|
||
return xml_content.encode("utf-8")
|
||
|
||
|
||
class ProverkiClientTest(TestCase):
|
||
"""Tests for ProverkiClient."""
|
||
|
||
def test_client_initialization(self):
|
||
"""Test client initializes correctly."""
|
||
client = ProverkiClient()
|
||
|
||
self.assertIsNone(client.proxies)
|
||
self.assertEqual(client.host, "proverki.gov.ru")
|
||
|
||
def test_client_with_proxies(self):
|
||
"""Test client accepts proxy list."""
|
||
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
|
||
client = ProverkiClient(proxies=proxies)
|
||
|
||
self.assertEqual(client.proxies, proxies)
|
||
|
||
def test_context_manager(self):
|
||
"""Test client works as context manager."""
|
||
with ProverkiClient() as client:
|
||
self.assertIsInstance(client, ProverkiClient)
|
||
|
||
def test_parse_xml_content_english_tags(self):
|
||
"""Test parsing XML with English tag names."""
|
||
client = ProverkiClient()
|
||
xml_content = _create_test_xml_inspections()
|
||
|
||
inspections = client._parse_xml_content(xml_content, None)
|
||
|
||
self.assertEqual(len(inspections), 2)
|
||
self.assertIsInstance(inspections[0], Inspection)
|
||
self.assertEqual(inspections[0].registration_number, "772024000001")
|
||
self.assertEqual(inspections[0].inn, "7701234567")
|
||
self.assertEqual(inspections[0].organisation_name, 'ООО "Тест Компания 1"')
|
||
self.assertEqual(inspections[0].control_authority, "Роспотребнадзор")
|
||
self.assertEqual(inspections[0].inspection_type, "плановая")
|
||
self.assertEqual(inspections[0].legal_basis, "294-ФЗ")
|
||
|
||
def test_parse_xml_content_russian_tags(self):
|
||
"""Test parsing XML with Russian tag names."""
|
||
client = ProverkiClient()
|
||
xml_content = _create_test_xml_inspections_russian_tags()
|
||
|
||
inspections = client._parse_xml_content(xml_content, None)
|
||
|
||
self.assertEqual(len(inspections), 1)
|
||
self.assertIsInstance(inspections[0], Inspection)
|
||
self.assertEqual(inspections[0].registration_number, "772024000003")
|
||
self.assertEqual(inspections[0].inn, "7703456789")
|
||
self.assertEqual(inspections[0].control_authority, "МЧС России")
|
||
|
||
def test_parse_xml_record_with_attributes(self):
|
||
"""Test parsing XML record with attributes instead of child elements."""
|
||
from xml.etree import ElementTree as ET
|
||
|
||
client = ProverkiClient()
|
||
xml_str = '<inspection inn="1234567890" registration_number="TEST123" organisation_name="Test Co"/>'
|
||
element = ET.fromstring(xml_str) # noqa: S314
|
||
|
||
result = client._parse_xml_record(element)
|
||
|
||
self.assertIsNotNone(result)
|
||
self.assertEqual(result.inn, "1234567890")
|
||
self.assertEqual(result.registration_number, "TEST123")
|
||
|
||
def test_parse_xml_record_invalid(self):
|
||
"""Test parsing invalid XML record returns None."""
|
||
from xml.etree import ElementTree as ET
|
||
|
||
client = ProverkiClient()
|
||
xml_str = "<empty_record></empty_record>"
|
||
element = ET.fromstring(xml_str) # noqa: S314
|
||
|
||
result = client._parse_xml_record(element)
|
||
|
||
self.assertIsNone(result)
|
||
|
||
def test_parse_windows_1251_encoding(self):
|
||
"""Test parsing XML with Windows-1251 encoding."""
|
||
client = ProverkiClient()
|
||
xml_content = """<?xml version="1.0" encoding="windows-1251"?>
|
||
<inspections>
|
||
<inspection>
|
||
<inn>1234567890</inn>
|
||
<registration_number>TEST001</registration_number>
|
||
<organisation_name>Компания</organisation_name>
|
||
</inspection>
|
||
</inspections>""".encode("windows-1251")
|
||
|
||
inspections = client._parse_xml_content(xml_content, None)
|
||
|
||
self.assertEqual(len(inspections), 1)
|
||
self.assertEqual(inspections[0].organisation_name, "Компания")
|
||
|
||
@patch.object(BaseHTTPClient, "download_file")
|
||
@patch.object(ProverkiClient, "_discover_data_files")
|
||
def test_fetch_inspections_with_file_url(self, mock_discover, mock_download):
|
||
"""Test fetching inspections with direct file URL."""
|
||
mock_download.return_value = _create_test_xml_inspections()
|
||
|
||
with ProverkiClient() as client:
|
||
inspections = client.fetch_inspections(
|
||
file_url="https://proverki.gov.ru/opendata/test.xml"
|
||
)
|
||
|
||
self.assertEqual(len(inspections), 2)
|
||
mock_discover.assert_not_called() # Should not discover files when URL provided
|
||
|
||
@patch.object(ProverkiClient, "_discover_data_files")
|
||
def test_fetch_inspections_no_files(self, mock_discover):
|
||
"""Test returns empty list when no files found."""
|
||
mock_discover.return_value = []
|
||
|
||
with ProverkiClient() as client:
|
||
inspections = client.fetch_inspections(year=2025)
|
||
|
||
self.assertEqual(inspections, [])
|
||
|
||
|
||
@tag("integration", "slow", "network")
|
||
class ProverkiClientIntegrationTest(TestCase):
|
||
"""
|
||
Интеграционные тесты для клиента proverki.gov.ru.
|
||
|
||
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
|
||
Запускать с тегом: python manage.py test --tag=integration
|
||
"""
|
||
|
||
def test_fetch_inspections_real_data(self):
|
||
"""
|
||
Интеграционный тест: реальная загрузка проверок с proverki.gov.ru.
|
||
"""
|
||
try:
|
||
with ProverkiClient(timeout=120) as client:
|
||
inspections = client.fetch_inspections(year=2025)
|
||
|
||
# Проверяем что данные получены
|
||
self.assertIsInstance(inspections, list)
|
||
|
||
# Если данные есть - проверяем структуру
|
||
if inspections:
|
||
insp = inspections[0]
|
||
self.assertIsInstance(insp, Inspection)
|
||
self.assertIsNotNone(insp.registration_number)
|
||
self.assertIsNotNone(insp.inn)
|
||
|
||
# Логируем для информации
|
||
print(f"\n[INTEGRATION] Loaded {len(inspections)} inspections")
|
||
print(f"[INTEGRATION] First inspection: {insp.registration_number}")
|
||
print(f"[INTEGRATION] Organisation: {insp.organisation_name}")
|
||
print(f"[INTEGRATION] Control authority: {insp.control_authority}")
|
||
else:
|
||
print(
|
||
"\n[INTEGRATION] No inspections found "
|
||
"(API may be unavailable or data format changed)"
|
||
)
|
||
|
||
except HTTPClientError as e:
|
||
# API может быть недоступен
|
||
self.skipTest(f"External API unavailable: {e}")
|