Files
mostovik-backend/tests/apps/parsers/test_structured_data_client.py

107 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from apps.parsers.clients.base import HTTPClientError
from apps.parsers.clients.common.structured import StructuredDataClient
from django.test import SimpleTestCase
class _FakeHTTPClient:
def __init__(self, responses: dict[str, bytes | Exception]) -> None:
self.responses = responses
self.downloaded_urls: list[str] = []
def download_file(self, endpoint: str, **_kwargs) -> bytes:
self.downloaded_urls.append(endpoint)
response = self.responses[endpoint]
if isinstance(response, Exception):
raise response
return response
class StructuredDataClientEisCardEnrichmentTest(SimpleTestCase):
def test_procurement_card_enriches_customer_identity_from_detail_page(self):
detail_url = (
"https://zakupki.gov.ru/epz/order/notice/ea20/view/"
"common-info.html?regNumber=0338100002026000022"
)
client = StructuredDataClient(source="procurements_44fz")
client._http_client = _FakeHTTPClient(
{
detail_url: """
<html>
<body>
<h2>Сведения о заказчике</h2>
<div>Полное наименование</div>
<div>ФЕДЕРАЛЬНОЕ ГБУ НАУКИ</div>
<div>ИНН / КПП</div>
<div>4101020011 / 410101001</div>
<div>ОГРН</div>
<div>1024101023456</div>
</body>
</html>
""".encode()
}
)
records = client.fetch_records(
content=f"""
<html>
<body>
<div class="search-registry-entry-block">
<a href="{detail_url}">№ 0338100002026000022</a>
<div>Работа комиссии</div>
<div>Объект закупки</div>
<div>Поставка оборудования</div>
<div>Заказчик</div>
<div>ФЕДЕРАЛЬНОЕ ГБУ НАУКИ</div>
<div>Начальная цена</div>
<div>1 000,00 ₽</div>
<div>Размещено</div>
<div>19.05.2026</div>
</div>
</body>
</html>
""".encode(),
file_name="results.html",
)
self.assertEqual(len(records), 1)
record = records[0]
self.assertEqual(record.inn, "4101020011")
self.assertEqual(record.ogrn, "1024101023456")
self.assertEqual(record.payload["kpp"], "410101001")
self.assertEqual(record.payload["detail_url"], detail_url)
self.assertEqual(client._http_client.downloaded_urls, [detail_url])
def test_procurement_card_keeps_record_when_detail_page_is_unavailable(self):
detail_url = (
"https://zakupki.gov.ru/epz/order/notice/ea20/view/"
"common-info.html?regNumber=0338100002026000022"
)
client = StructuredDataClient(source="procurements_44fz")
client._http_client = _FakeHTTPClient(
{detail_url: HTTPClientError("detail unavailable", url=detail_url)}
)
records = client.fetch_records(
content=f"""
<html>
<body>
<div class="search-registry-entry-block">
<a href="{detail_url}">№ 0338100002026000022</a>
<div>Объект закупки</div>
<div>Поставка оборудования</div>
<div>Заказчик</div>
<div>ФЕДЕРАЛЬНОЕ ГБУ НАУКИ</div>
</div>
</body>
</html>
""".encode(),
file_name="results.html",
)
self.assertEqual(len(records), 1)
self.assertEqual(records[0].inn, "")
self.assertEqual(records[0].ogrn, "")
self.assertEqual(records[0].organisation_name, "ФЕДЕРАЛЬНОЕ ГБУ НАУКИ")