fix(parsers): support official product registry headers
All checks were successful
CI/CD Pipeline / Quality Gate (push) Successful in 20s
CI/CD Pipeline / Build and Push Images (push) Successful in 6s
CI/CD Pipeline / Internal Notify (push) Successful in 1s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Successful in 1s

This commit is contained in:
2026-04-28 23:05:48 +02:00
parent 01387ae13b
commit b2355b0e63
2 changed files with 84 additions and 0 deletions

View File

@@ -31,6 +31,8 @@ HEADER_ALIASES = {
"наименование организации",
"организация",
"full_organisation_name",
"Nameoforg",
"Name of org",
},
"ogrn": {"огрн", "ogrn"},
"inn": {"инн", "inn"},
@@ -40,12 +42,16 @@ HEADER_ALIASES = {
"реестровый номер",
"номер реестровой записи",
"registry_number",
"Registernumber",
"Register number",
},
"product_name": {
"наименование продукции",
"наименование промышленной продукции",
"продукция",
"product_name",
"Productname",
"Product name",
},
"product_model": {
"модель или модификация",
@@ -57,12 +63,14 @@ HEADER_ALIASES = {
"код по окпд2",
"окпд2",
"okpd2_code",
"OKPD2",
},
"tnved_code": {
"код по тн вэд",
"тн вэд",
"тнвэд",
"tnved_code",
"TNVED",
},
"regulatory_document": {
"наименование нормативного документа",

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import io
from unittest.mock import patch
from urllib.parse import urlparse
@@ -23,6 +24,7 @@ from apps.parsers.clients.minpromtorg.schemas import (
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
from django.test import TestCase, tag
from openpyxl import Workbook
from openpyxl import load_workbook as openpyxl_load_workbook
from requests.adapters import BaseAdapter
@@ -549,6 +551,80 @@ class IndustrialProductsClientTest(TestCase):
self.assertEqual(len(products), len(rows))
self.assertTrue(load_workbook_mock.call_args.kwargs["read_only"])
def test_fetch_products_supports_official_english_headers(self):
workbook = Workbook()
worksheet = workbook.active
worksheet.append(
[
"Nameoforg",
"OGRN",
"INN",
"Orgaddr",
"Productmanufaddress",
"Regnumber",
"Ektrudp",
"Docdate",
"Docvalidtill",
"Enddate",
"Registernumber",
"Productname",
"OKPD2",
"TNVED",
]
)
worksheet.append(
[
"ООО Тест",
"1027700032953",
"7701256405",
"-",
"-",
"4963\\2\\2023",
"-",
"2026-04-28",
"2029-04-27",
"-",
"10092840",
"Плата материнская",
"26.20.30",
"8471 80 000 0",
]
)
buffer = io.BytesIO()
workbook.save(buffer)
workbook.close()
file_name = "industrial_products_20260428.xlsx"
with TestHTTPServer() as server:
server.add_json(
"/api/kss-document-preview",
{
"data": [
{
"name": IndustrialProductsClient().query,
"files": [
{"name": file_name, "url": f"/files/{file_name}"}
],
}
]
},
)
server.add_bytes(f"/files/{file_name}", buffer.getvalue())
client = IndustrialProductsClient(
host=_host_from_base_url(server.base_url),
scheme="http",
http_adapter=server.adapter,
)
products = client.fetch_products()
self.assertEqual(len(products), 1)
self.assertEqual(products[0].full_organisation_name, "ООО Тест")
self.assertEqual(products[0].registry_number, "10092840")
self.assertEqual(products[0].product_name, "Плата материнская")
self.assertEqual(products[0].okpd2_code, "26.20.30")
self.assertEqual(products[0].tnved_code, "8471 80 000 0")
def test_get_latest_file_url_falls_back_to_excel_file(self):
client = IndustrialProductsClient()
files = [