From 6b896b20cc87f9665ded7ed36d1ace32438ca9e3 Mon Sep 17 00:00:00 2001 From: Claire Date: Thu, 3 Aug 2023 11:12:52 +0200 Subject: [PATCH] Add primary key to preview_cards_statuses join table (includes deduplication migration) (#25243) --- app/services/fetch_link_card_service.rb | 10 +++-- ..._unique_index_on_preview_cards_statuses.rb | 39 +++++++++++++++++++ ...ey_to_preview_cards_statuses_join_table.rb | 20 ++++++++++ db/schema.rb | 4 +- lib/tasks/tests.rake | 25 ++++++++++++ 5 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 db/post_migrate/20230803082451_add_unique_index_on_preview_cards_statuses.rb create mode 100644 db/post_migrate/20230803112520_add_primary_key_to_preview_cards_statuses_join_table.rb diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 886582460..13775e63c 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -61,9 +61,13 @@ class FetchLinkCardService < BaseService end def attach_card - @status.preview_cards << @card - Rails.cache.delete(@status) - Trends.links.register(@status) + with_redis_lock("attach_card:#{@status.id}") do + return if @status.preview_cards.any? + + @status.preview_cards << @card + Rails.cache.delete(@status) + Trends.links.register(@status) + end end def parse_urls diff --git a/db/post_migrate/20230803082451_add_unique_index_on_preview_cards_statuses.rb b/db/post_migrate/20230803082451_add_unique_index_on_preview_cards_statuses.rb new file mode 100644 index 000000000..936d7840e --- /dev/null +++ b/db/post_migrate/20230803082451_add_unique_index_on_preview_cards_statuses.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +class AddUniqueIndexOnPreviewCardsStatuses < ActiveRecord::Migration[6.1] + disable_ddl_transaction! + + def up + add_index :preview_cards_statuses, [:status_id, :preview_card_id], name: :preview_cards_statuses_pkey, algorithm: :concurrently, unique: true + rescue ActiveRecord::RecordNotUnique + deduplicate_and_reindex! + end + + def down + remove_index :preview_cards_statuses, name: :preview_cards_statuses_pkey + end + + private + + def deduplicate_and_reindex! + deduplicate_preview_cards! + + safety_assured { execute 'REINDEX INDEX preview_cards_statuses_pkey' } + rescue ActiveRecord::RecordNotUnique + retry + end + + def deduplicate_preview_cards! + # Statuses should have only one preview card at most, even if that's not the database + # constraint we will end up with + duplicate_ids = select_all('SELECT status_id FROM preview_cards_statuses GROUP BY status_id HAVING count(*) > 1;').rows + + duplicate_ids.each_slice(1000) do |ids| + # This one is tricky: since we don't have primary keys to keep only one record, + # use the physical `ctid` + safety_assured do + execute "DELETE FROM preview_cards_statuses p WHERE p.status_id IN (#{ids.join(', ')}) AND p.ctid NOT IN (SELECT q.ctid FROM preview_cards_statuses q WHERE q.status_id = p.status_id LIMIT 1)" + end + end + end +end diff --git a/db/post_migrate/20230803112520_add_primary_key_to_preview_cards_statuses_join_table.rb b/db/post_migrate/20230803112520_add_primary_key_to_preview_cards_statuses_join_table.rb new file mode 100644 index 000000000..34877ac67 --- /dev/null +++ b/db/post_migrate/20230803112520_add_primary_key_to_preview_cards_statuses_join_table.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AddPrimaryKeyToPreviewCardsStatusesJoinTable < ActiveRecord::Migration[6.1] + disable_ddl_transaction! + + def up + safety_assured do + execute 'ALTER TABLE preview_cards_statuses ADD PRIMARY KEY USING INDEX preview_cards_statuses_pkey' + end + end + + def down + safety_assured do + # I have found no way to demote the primary key to an index, instead, re-create the index + execute 'CREATE UNIQUE INDEX CONCURRENTLY preview_cards_statuses_pkey_tmp ON preview_cards_statuses (status_id, preview_card_id)' + execute 'ALTER TABLE preview_cards_statuses DROP CONSTRAINT preview_cards_statuses_pkey' + execute 'ALTER INDEX preview_cards_statuses_pkey_tmp RENAME TO preview_cards_statuses_pkey' + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 59ab9dbed..0c8ede383 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_07_24_160715) do +ActiveRecord::Schema[7.0].define(version: 2023_08_03_112520) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -805,7 +805,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_07_24_160715) do t.index ["url"], name: "index_preview_cards_on_url", unique: true end - create_table "preview_cards_statuses", id: false, force: :cascade do |t| + create_table "preview_cards_statuses", primary_key: ["status_id", "preview_card_id"], force: :cascade do |t| t.bigint "preview_card_id", null: false t.bigint "status_id", null: false t.index ["status_id", "preview_card_id"], name: "index_preview_cards_statuses_on_status_id_and_preview_card_id" diff --git a/lib/tasks/tests.rake b/lib/tasks/tests.rake index 3c88ce450..ef4d46fe0 100644 --- a/lib/tasks/tests.rake +++ b/lib/tasks/tests.rake @@ -63,6 +63,11 @@ namespace :tests do puts 'Account domains not properly normalized' exit(1) end + + unless Status.find(12).preview_cards.pluck(:url) == ['https://joinmastodon.org/'] + puts 'Preview cards not deduplicated as expected' + exit(1) + end end desc 'Populate the database with test data for 2.4.3' @@ -238,6 +243,11 @@ namespace :tests do (10, 2, '@admin hey!', NULL, 1, 3, now(), now()), (11, 1, '@user hey!', 10, 1, 3, now(), now()); + INSERT INTO "statuses" + (id, account_id, text, created_at, updated_at) + VALUES + (12, 1, 'check out https://joinmastodon.org/', now(), now()); + -- mentions (from previous statuses) INSERT INTO "mentions" @@ -326,6 +336,21 @@ namespace :tests do (1, 6, 2, 'Follow', 2, now(), now()), (2, 2, 1, 'Mention', 4, now(), now()), (3, 1, 2, 'Mention', 5, now(), now()); + + -- preview cards + + INSERT INTO "preview_cards" + (id, url, title, created_at, updated_at) + VALUES + (1, 'https://joinmastodon.org/', 'Mastodon - Decentralized social media', now(), now()); + + -- many-to-many association between preview cards and statuses + + INSERT INTO "preview_cards_statuses" + (status_id, preview_card_id) + VALUES + (12, 1), + (12, 1); SQL end end