perf(organizations): speed up filtered API lists
This commit is contained in:
119
src/organizations/migrations/0005_snapshot_data_source_counts.py
Normal file
119
src/organizations/migrations/0005_snapshot_data_source_counts.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
API_DATA_SOURCE_ALIASES = {
|
||||
"trudvsem": "vacancies",
|
||||
}
|
||||
|
||||
|
||||
def to_api_data_source(source):
|
||||
return API_DATA_SOURCE_ALIASES.get(str(source), str(source))
|
||||
|
||||
|
||||
def data_source_summary(data):
|
||||
summary = []
|
||||
for source in sorted(data):
|
||||
value = data[source]
|
||||
if isinstance(value, list):
|
||||
count = len(value)
|
||||
elif value:
|
||||
count = 1
|
||||
else:
|
||||
count = 0
|
||||
if count:
|
||||
summary.append({"source": to_api_data_source(source), "count": count})
|
||||
return summary
|
||||
|
||||
|
||||
def backfill_data_source_counts_python(apps):
|
||||
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
|
||||
updates = []
|
||||
|
||||
for snapshot in snapshot_model.objects.only(
|
||||
"organization_id",
|
||||
"data",
|
||||
"data_source_counts",
|
||||
).iterator(chunk_size=100):
|
||||
snapshot.data_source_counts = data_source_summary(snapshot.data)
|
||||
updates.append(snapshot)
|
||||
if len(updates) >= 100:
|
||||
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
|
||||
updates = []
|
||||
|
||||
if updates:
|
||||
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
|
||||
|
||||
|
||||
def backfill_data_source_counts(apps, schema_editor):
|
||||
if schema_editor.connection.vendor != "postgresql":
|
||||
backfill_data_source_counts_python(apps)
|
||||
return
|
||||
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
UPDATE organizations_data_snapshot snapshot
|
||||
SET data_source_counts = COALESCE(
|
||||
(
|
||||
SELECT jsonb_agg(
|
||||
jsonb_build_object(
|
||||
'source',
|
||||
source_counts.source,
|
||||
'count',
|
||||
source_counts.record_count
|
||||
)
|
||||
ORDER BY source_counts.source
|
||||
)
|
||||
FROM (
|
||||
SELECT CASE source_items.key
|
||||
WHEN 'trudvsem' THEN 'vacancies'
|
||||
ELSE source_items.key
|
||||
END AS source,
|
||||
CASE
|
||||
WHEN jsonb_typeof(source_items.value) = 'array'
|
||||
THEN jsonb_array_length(source_items.value)
|
||||
WHEN source_items.value IN (
|
||||
'null'::jsonb,
|
||||
'false'::jsonb,
|
||||
'[]'::jsonb,
|
||||
'{}'::jsonb,
|
||||
'""'::jsonb
|
||||
)
|
||||
THEN 0
|
||||
ELSE 1
|
||||
END AS record_count
|
||||
FROM jsonb_each(snapshot.data) AS source_items
|
||||
) AS source_counts
|
||||
WHERE source_counts.record_count > 0
|
||||
),
|
||||
'[]'::jsonb
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def clear_data_source_counts(apps, schema_editor):
|
||||
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
|
||||
snapshot_model.objects.update(data_source_counts=[])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("organizations", "0004_seed_daily_snapshot_refresh_schedule"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="organizationdatasnapshot",
|
||||
name="data_source_counts",
|
||||
field=models.JSONField(
|
||||
default=list,
|
||||
help_text="Готовый JSON data_sources для API v2",
|
||||
verbose_name="счетчики источников",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(
|
||||
backfill_data_source_counts,
|
||||
reverse_code=clear_data_source_counts,
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user