perf(organizations): speed up filtered API lists
All checks were successful
CI/CD Pipeline / Quality Gate (push) Successful in 28s
CI/CD Pipeline / Build and Push Images (push) Successful in 10s
CI/CD Pipeline / Internal Notify (push) Successful in 0s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Successful in 1s

This commit is contained in:
2026-05-14 17:08:03 +02:00
parent df89e498cc
commit 19a7d5a91c
10 changed files with 360 additions and 62 deletions

View File

@@ -0,0 +1,119 @@
from django.db import migrations, models
API_DATA_SOURCE_ALIASES = {
"trudvsem": "vacancies",
}
def to_api_data_source(source):
return API_DATA_SOURCE_ALIASES.get(str(source), str(source))
def data_source_summary(data):
summary = []
for source in sorted(data):
value = data[source]
if isinstance(value, list):
count = len(value)
elif value:
count = 1
else:
count = 0
if count:
summary.append({"source": to_api_data_source(source), "count": count})
return summary
def backfill_data_source_counts_python(apps):
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
updates = []
for snapshot in snapshot_model.objects.only(
"organization_id",
"data",
"data_source_counts",
).iterator(chunk_size=100):
snapshot.data_source_counts = data_source_summary(snapshot.data)
updates.append(snapshot)
if len(updates) >= 100:
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
updates = []
if updates:
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
def backfill_data_source_counts(apps, schema_editor):
if schema_editor.connection.vendor != "postgresql":
backfill_data_source_counts_python(apps)
return
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
UPDATE organizations_data_snapshot snapshot
SET data_source_counts = COALESCE(
(
SELECT jsonb_agg(
jsonb_build_object(
'source',
source_counts.source,
'count',
source_counts.record_count
)
ORDER BY source_counts.source
)
FROM (
SELECT CASE source_items.key
WHEN 'trudvsem' THEN 'vacancies'
ELSE source_items.key
END AS source,
CASE
WHEN jsonb_typeof(source_items.value) = 'array'
THEN jsonb_array_length(source_items.value)
WHEN source_items.value IN (
'null'::jsonb,
'false'::jsonb,
'[]'::jsonb,
'{}'::jsonb,
'""'::jsonb
)
THEN 0
ELSE 1
END AS record_count
FROM jsonb_each(snapshot.data) AS source_items
) AS source_counts
WHERE source_counts.record_count > 0
),
'[]'::jsonb
)
"""
)
def clear_data_source_counts(apps, schema_editor):
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
snapshot_model.objects.update(data_source_counts=[])
class Migration(migrations.Migration):
dependencies = [
("organizations", "0004_seed_daily_snapshot_refresh_schedule"),
]
operations = [
migrations.AddField(
model_name="organizationdatasnapshot",
name="data_source_counts",
field=models.JSONField(
default=list,
help_text="Готовый JSON data_sources для API v2",
verbose_name="счетчики источников",
),
),
migrations.RunPython(
backfill_data_source_counts,
reverse_code=clear_data_source_counts,
),
]