120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
from django.db import migrations, models
|
|
|
|
|
|
API_DATA_SOURCE_ALIASES = {
|
|
"trudvsem": "vacancies",
|
|
}
|
|
|
|
|
|
def to_api_data_source(source):
|
|
return API_DATA_SOURCE_ALIASES.get(str(source), str(source))
|
|
|
|
|
|
def data_source_summary(data):
|
|
summary = []
|
|
for source in sorted(data):
|
|
value = data[source]
|
|
if isinstance(value, list):
|
|
count = len(value)
|
|
elif value:
|
|
count = 1
|
|
else:
|
|
count = 0
|
|
if count:
|
|
summary.append({"source": to_api_data_source(source), "count": count})
|
|
return summary
|
|
|
|
|
|
def backfill_data_source_counts_python(apps):
|
|
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
|
|
updates = []
|
|
|
|
for snapshot in snapshot_model.objects.only(
|
|
"organization_id",
|
|
"data",
|
|
"data_source_counts",
|
|
).iterator(chunk_size=100):
|
|
snapshot.data_source_counts = data_source_summary(snapshot.data)
|
|
updates.append(snapshot)
|
|
if len(updates) >= 100:
|
|
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
|
|
updates = []
|
|
|
|
if updates:
|
|
snapshot_model.objects.bulk_update(updates, ["data_source_counts"])
|
|
|
|
|
|
def backfill_data_source_counts(apps, schema_editor):
|
|
if schema_editor.connection.vendor != "postgresql":
|
|
backfill_data_source_counts_python(apps)
|
|
return
|
|
|
|
with schema_editor.connection.cursor() as cursor:
|
|
cursor.execute(
|
|
"""
|
|
UPDATE organizations_data_snapshot snapshot
|
|
SET data_source_counts = COALESCE(
|
|
(
|
|
SELECT jsonb_agg(
|
|
jsonb_build_object(
|
|
'source',
|
|
source_counts.source,
|
|
'count',
|
|
source_counts.record_count
|
|
)
|
|
ORDER BY source_counts.source
|
|
)
|
|
FROM (
|
|
SELECT CASE source_items.key
|
|
WHEN 'trudvsem' THEN 'vacancies'
|
|
ELSE source_items.key
|
|
END AS source,
|
|
CASE
|
|
WHEN jsonb_typeof(source_items.value) = 'array'
|
|
THEN jsonb_array_length(source_items.value)
|
|
WHEN source_items.value IN (
|
|
'null'::jsonb,
|
|
'false'::jsonb,
|
|
'[]'::jsonb,
|
|
'{}'::jsonb,
|
|
'""'::jsonb
|
|
)
|
|
THEN 0
|
|
ELSE 1
|
|
END AS record_count
|
|
FROM jsonb_each(snapshot.data) AS source_items
|
|
) AS source_counts
|
|
WHERE source_counts.record_count > 0
|
|
),
|
|
'[]'::jsonb
|
|
)
|
|
"""
|
|
)
|
|
|
|
|
|
def clear_data_source_counts(apps, schema_editor):
|
|
snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot")
|
|
snapshot_model.objects.update(data_source_counts=[])
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
dependencies = [
|
|
("organizations", "0004_seed_daily_snapshot_refresh_schedule"),
|
|
]
|
|
|
|
operations = [
|
|
migrations.AddField(
|
|
model_name="organizationdatasnapshot",
|
|
name="data_source_counts",
|
|
field=models.JSONField(
|
|
default=list,
|
|
help_text="Готовый JSON data_sources для API v2",
|
|
verbose_name="счетчики источников",
|
|
),
|
|
),
|
|
migrations.RunPython(
|
|
backfill_data_source_counts,
|
|
reverse_code=clear_data_source_counts,
|
|
),
|
|
]
|