feat: add parser source dashboard and scheduling
All checks were successful
CI/CD Pipeline / Code Quality Checks (pull_request) Successful in 1m6s
CI/CD Pipeline / Run Tests (pull_request) Successful in 1m18s
CI/CD Pipeline / Build Docker Images (pull_request) Has been skipped
CI/CD Pipeline / Push to Gitea Registry (pull_request) Has been skipped

This commit is contained in:
2026-04-27 23:36:28 +02:00
parent 199d871923
commit 44355deeb3
96 changed files with 15015 additions and 309 deletions

View File

@@ -1,12 +1,12 @@
"""Tests for parsers services."""
from django.test import TestCase
from faker import Faker
from decimal import Decimal
from apps.parsers.clients.common.schemas import GenericParserItem
from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer
from apps.parsers.clients.proverki.schemas import Inspection
from apps.parsers.models import (
GenericParserRecord,
IndustrialCertificateRecord,
InspectionRecord,
ManufacturerRecord,
@@ -14,14 +14,18 @@ from apps.parsers.models import (
Proxy,
)
from apps.parsers.services import (
GenericParserRecordService,
IndustrialCertificateService,
InspectionService,
ManufacturerService,
ParserLoadLogService,
ProxyService,
)
from django.test import TestCase
from faker import Faker
from .factories import (
GenericParserRecordFactory,
IndustrialCertificateRecordFactory,
InspectionRecordFactory,
ManufacturerRecordFactory,
@@ -147,7 +151,9 @@ class ParserLoadLogServiceTest(TestCase):
def test_get_next_batch_id_first(self):
"""Test getting first batch_id for new source."""
batch_id = ParserLoadLogService.get_next_batch_id(ParserLoadLog.Source.INDUSTRIAL)
batch_id = ParserLoadLogService.get_next_batch_id(
ParserLoadLog.Source.INDUSTRIAL
)
self.assertEqual(batch_id, 1)
def test_get_next_batch_id_increment(self):
@@ -155,7 +161,9 @@ class ParserLoadLogServiceTest(TestCase):
ParserLoadLogFactory(batch_id=5, source=ParserLoadLog.Source.INDUSTRIAL)
ParserLoadLogFactory(batch_id=3, source=ParserLoadLog.Source.INDUSTRIAL)
batch_id = ParserLoadLogService.get_next_batch_id(ParserLoadLog.Source.INDUSTRIAL)
batch_id = ParserLoadLogService.get_next_batch_id(
ParserLoadLog.Source.INDUSTRIAL
)
self.assertEqual(batch_id, 6)
def test_get_next_batch_id_per_source(self):
@@ -173,6 +181,32 @@ class ParserLoadLogServiceTest(TestCase):
self.assertEqual(industrial_batch, 11)
self.assertEqual(manufactures_batch, 6)
def test_create_next_load_log_reserves_batch_ids(self):
"""Test atomic batch allocator creates sequential load logs."""
first = ParserLoadLogService.create_next_load_log(
source=ParserLoadLog.Source.INDUSTRIAL,
status="in_progress",
)
second = ParserLoadLogService.create_next_load_log(
source=ParserLoadLog.Source.INDUSTRIAL,
status="in_progress",
)
self.assertEqual(first.batch_id, 1)
self.assertEqual(second.batch_id, 2)
self.assertEqual(ParserLoadLog.objects.count(), 2)
def test_create_next_load_log_honors_existing_logs(self):
"""Test batch allocator starts after existing manual logs."""
ParserLoadLogFactory(batch_id=7, source=ParserLoadLog.Source.INDUSTRIAL)
log = ParserLoadLogService.create_next_load_log(
source=ParserLoadLog.Source.INDUSTRIAL,
status="in_progress",
)
self.assertEqual(log.batch_id, 8)
def test_create_load_log(self):
"""Test creating load log."""
log = ParserLoadLogService.create_load_log(
@@ -272,7 +306,9 @@ class IndustrialCertificateServiceTest(TestCase):
results = IndustrialCertificateService.find_by_inn("1111111111")
self.assertEqual(results.count(), 2)
results_batch1 = IndustrialCertificateService.find_by_inn("1111111111", batch_id=1)
results_batch1 = IndustrialCertificateService.find_by_inn(
"1111111111", batch_id=1
)
self.assertEqual(results_batch1.count(), 1)
def test_find_by_certificate_number(self):
@@ -314,6 +350,7 @@ class IndustrialCertificateServiceTest(TestCase):
)
]
count2 = IndustrialCertificateService.save_certificates(duplicate, batch_id=2)
self.assertEqual(count2, 0)
# Should still be 1 record (duplicate skipped)
self.assertEqual(IndustrialCertificateRecord.objects.count(), 1)
@@ -421,6 +458,7 @@ class ManufacturerServiceTest(TestCase):
)
]
count2 = ManufacturerService.save_manufacturers(duplicate, batch_id=2)
self.assertEqual(count2, 0)
# Should still be 1 record (duplicate skipped)
self.assertEqual(ManufacturerRecord.objects.count(), 1)
@@ -433,6 +471,147 @@ class ManufacturerServiceTest(TestCase):
self.assertEqual(record.load_batch, 1) # Original batch
class GenericParserRecordServiceTest(TestCase):
"""Tests for GenericParserRecordService."""
def test_save_records_empty(self):
"""Test saving empty generic records returns 0."""
count = GenericParserRecordService.save_records(
[],
batch_id=1,
source=ParserLoadLog.Source.FNS_FINANCIAL,
)
self.assertEqual(count, 0)
def test_save_records(self):
"""Test saving generic records from dataclass."""
records = [
GenericParserItem(
source=ParserLoadLog.Source.FNS_FINANCIAL,
external_id=f"FIN-{i}",
inn=f"123456789{i}",
ogrn=f"123456789012{i}",
organisation_name=f"Company {i}",
title="Выручка",
record_date="2024",
amount=Decimal("1000.00"),
status="active",
url=f"https://example.com/{i}",
payload={"external_id": f"FIN-{i}"},
)
for i in range(5)
]
count = GenericParserRecordService.save_records(
records,
batch_id=1,
source=ParserLoadLog.Source.FNS_FINANCIAL,
)
self.assertEqual(count, 5)
self.assertEqual(GenericParserRecord.objects.count(), 5)
def test_save_records_deduplication(self):
"""Test saving generic records skips duplicates by source and external_id."""
record = GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="VAC-1",
inn="1234567890",
organisation_name="Old Company",
payload={"name": "old"},
)
count1 = GenericParserRecordService.save_records(
[record],
batch_id=1,
source=ParserLoadLog.Source.TRUDVSEM,
)
count2 = GenericParserRecordService.save_records(
[record],
batch_id=2,
source=ParserLoadLog.Source.TRUDVSEM,
)
self.assertEqual(count1, 1)
self.assertEqual(count2, 0)
self.assertEqual(GenericParserRecord.objects.count(), 1)
def test_save_records_deduplicates_incoming_batch(self):
"""Test duplicate external IDs inside one payload count as one save."""
records = [
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="VAC-SAME",
inn="1234567890",
organisation_name="First Company",
payload={"name": "first"},
),
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="VAC-SAME",
inn="9999999999",
organisation_name="Second Company",
payload={"name": "second"},
),
]
count = GenericParserRecordService.save_records(
records,
batch_id=1,
source=ParserLoadLog.Source.TRUDVSEM,
)
self.assertEqual(count, 1)
self.assertEqual(GenericParserRecord.objects.count(), 1)
self.assertEqual(
GenericParserRecord.objects.get().organisation_name,
"First Company",
)
def test_create_with_exact_count_returns_zero_for_conflict(self):
"""Test fallback count is exact when DB unique constraint rejects insert."""
GenericParserRecordFactory(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="VAC-RACE",
)
instance = GenericParserRecord(
load_batch=2,
source=ParserLoadLog.Source.TRUDVSEM,
external_id="VAC-RACE",
inn="1234567890",
organisation_name="Concurrent Company",
payload={"external_id": "VAC-RACE"},
)
count = GenericParserRecordService._create_with_exact_count(
[instance],
unique_fields=["source", "external_id"],
chunk_size=500,
)
self.assertEqual(count, 0)
self.assertEqual(GenericParserRecord.objects.count(), 1)
def test_find_by_inn_with_source(self):
"""Test finding generic records by INN and source."""
GenericParserRecordFactory(
source=ParserLoadLog.Source.TRUDVSEM,
inn="1234567890",
)
GenericParserRecordFactory(
source=ParserLoadLog.Source.FNS_FINANCIAL,
inn="1234567890",
)
result = GenericParserRecordService.find_by_inn(
"1234567890",
source=ParserLoadLog.Source.TRUDVSEM,
)
self.assertEqual(result.count(), 1)
self.assertEqual(result.first().source, ParserLoadLog.Source.TRUDVSEM)
class InspectionServiceTest(TestCase):
"""Tests for InspectionService."""
@@ -568,6 +747,7 @@ class InspectionServiceTest(TestCase):
)
]
count2 = InspectionService.save_inspections(duplicate, batch_id=2)
self.assertEqual(count2, 0)
# Should still be 1 record (duplicate skipped)
self.assertEqual(InspectionRecord.objects.count(), 1)
@@ -581,10 +761,9 @@ class InspectionServiceTest(TestCase):
self.assertEqual(record.load_batch, 1) # Original batch
from django.test import tag
from apps.parsers.clients.base import HTTPClientError
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from django.test import tag
@tag("integration", "slow", "network", "e2e")
@@ -674,4 +853,3 @@ class EndToEndIntegrationTest(TestCase):
except HTTPClientError as e:
self.skipTest(f"External API unavailable: {e}")