statuses_cli.rb 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class StatusesCLI < Thor
  7. include CLIHelper
  8. include ActionView::Helpers::NumberHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 90
  13. option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
  14. option :continue, type: :boolean, default: false, desc: 'If remove is not completed, execute from the previous continuation'
  15. option :clean_followed, type: :boolean, default: false, desc: 'Include the status of remote accounts that are followed by local accounts as candidates for remove'
  16. option :skip_status_remove, type: :boolean, default: false, desc: 'Skip status remove (run only cleanup tasks)'
  17. option :skip_media_remove, type: :boolean, default: false, desc: 'Skip remove orphaned media attachments'
  18. option :compress_database, type: :boolean, default: false, desc: 'Compress database and update the statistics. This option locks the table for a long time, so run it offline'
  19. desc 'remove', 'Remove unreferenced statuses'
  20. long_desc <<~LONG_DESC
  21. Remove statuses that are not referenced by local user activity, such as
  22. ones that came from relays, or belonging to users that were once followed
  23. by someone locally but no longer are.
  24. It also removes orphaned records and performs additional cleanup tasks
  25. such as updating statistics and recovering disk space.
  26. This is a computationally heavy procedure that creates extra database
  27. indices before commencing, and removes them afterward.
  28. LONG_DESC
  29. def remove
  30. if options[:batch_size] < 1
  31. say('Cannot run with this batch_size setting, must be at least 1', :red)
  32. exit(1)
  33. end
  34. remove_statuses
  35. vacuum_and_analyze_statuses
  36. remove_orphans_media_attachments
  37. remove_orphans_conversations
  38. vacuum_and_analyze_conversations
  39. end
  40. private
  41. def remove_statuses
  42. return if options[:skip_status_remove]
  43. say('Creating temporary database indices...')
  44. ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently, if_not_exists: true)
  45. max_id = Mastodon::Snowflake.id_at(options[:days].days.ago, with_random: false)
  46. start_at = Time.now.to_f
  47. unless options[:continue] && ActiveRecord::Base.connection.table_exists?('statuses_to_be_deleted')
  48. ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently, if_not_exists: true)
  49. ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently, if_not_exists: true)
  50. say('Extract the deletion target from statuses... This might take a while...')
  51. ActiveRecord::Base.connection.create_table('statuses_to_be_deleted', force: true)
  52. # Skip accounts followed by local accounts
  53. clean_followed_sql = 'AND NOT EXISTS (SELECT 1 FROM follows WHERE statuses.account_id = follows.target_account_id)' unless options[:clean_followed]
  54. ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL', [[nil, max_id]])
  55. INSERT INTO statuses_to_be_deleted (id)
  56. SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1)
  57. AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)
  58. AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))
  59. AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1))
  60. AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id)
  61. AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
  62. AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
  63. AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
  64. #{clean_followed_sql}
  65. SQL
  66. say('Removing temporary database indices to restore write performance...')
  67. ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
  68. ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
  69. end
  70. say('Beginning statuses removal... This might take a while...')
  71. klass = Class.new(ApplicationRecord) do |c|
  72. c.table_name = 'statuses_to_be_deleted'
  73. end
  74. Object.const_set('StatusToBeDeleted', klass)
  75. scope = StatusToBeDeleted
  76. processed = 0
  77. removed = 0
  78. progress = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)
  79. scope.reorder(nil).in_batches(of: options[:batch_size]) do |relation|
  80. ids = relation.pluck(:id)
  81. processed += ids.count
  82. removed += Status.unscoped.where(id: ids).delete_all
  83. progress.increment
  84. end
  85. progress.stop
  86. ActiveRecord::Base.connection.drop_table('statuses_to_be_deleted')
  87. say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} statuses.", :green)
  88. ensure
  89. say('Removing temporary database indices to restore write performance...')
  90. ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
  91. ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
  92. ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url, if_exists: true)
  93. end
  94. def remove_orphans_media_attachments
  95. return if options[:skip_media_remove]
  96. start_at = Time.now.to_f
  97. say('Beginning removal of now-orphaned media attachments to free up disk space...')
  98. scope = MediaAttachment.reorder(nil).unattached.where('created_at < ?', options[:days].pred.days.ago)
  99. processed = 0
  100. removed = 0
  101. progress = create_progress_bar(scope.count)
  102. scope.find_each do |media_attachment|
  103. media_attachment.destroy!
  104. removed += 1
  105. rescue => e
  106. progress.log pastel.red("Error processing #{media_attachment.id}: #{e}")
  107. ensure
  108. progress.increment
  109. processed += 1
  110. end
  111. progress.stop
  112. say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} media_attachments.", :green)
  113. end
  114. def remove_orphans_conversations
  115. start_at = Time.now.to_f
  116. unless options[:continue] && ActiveRecord::Base.connection.table_exists?('conversations_to_be_deleted')
  117. say('Creating temporary database indices...')
  118. ActiveRecord::Base.connection.add_index(:statuses, :conversation_id, name: :index_statuses_conversation_id, algorithm: :concurrently, if_not_exists: true)
  119. say('Extract the deletion target from conversations... This might take a while...')
  120. ActiveRecord::Base.connection.create_table('conversations_to_be_deleted', force: true)
  121. ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL')
  122. INSERT INTO conversations_to_be_deleted (id)
  123. SELECT id FROM conversations WHERE NOT EXISTS (SELECT 1 FROM statuses WHERE statuses.conversation_id = conversations.id)
  124. SQL
  125. say('Removing temporary database indices to restore write performance...')
  126. ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
  127. end
  128. say('Beginning orphans removal... This might take a while...')
  129. klass = Class.new(ApplicationRecord) do |c|
  130. c.table_name = 'conversations_to_be_deleted'
  131. end
  132. Object.const_set('ConversationsToBeDeleted', klass)
  133. scope = ConversationsToBeDeleted
  134. processed = 0
  135. removed = 0
  136. progress = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)
  137. scope.in_batches(of: options[:batch_size]) do |relation|
  138. ids = relation.pluck(:id)
  139. processed += ids.count
  140. removed += Conversation.unscoped.where(id: ids).delete_all
  141. progress.increment
  142. end
  143. progress.stop
  144. ActiveRecord::Base.connection.drop_table('conversations_to_be_deleted')
  145. say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} conversations.", :green)
  146. ensure
  147. say('Removing temporary database indices to restore write performance...')
  148. ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
  149. end
  150. def vacuum_and_analyze_statuses
  151. if options[:compress_database]
  152. say('Run VACUUM FULL ANALYZE to statuses...')
  153. ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE statuses')
  154. say('Run REINDEX to statuses...')
  155. ActiveRecord::Base.connection.execute('REINDEX TABLE statuses')
  156. else
  157. say('Run ANALYZE to statuses...')
  158. ActiveRecord::Base.connection.execute('ANALYZE statuses')
  159. end
  160. end
  161. def vacuum_and_analyze_conversations
  162. if options[:compress_database]
  163. say('Run VACUUM FULL ANALYZE to conversations...')
  164. ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE conversations')
  165. say('Run REINDEX to conversations...')
  166. ActiveRecord::Base.connection.execute('REINDEX TABLE conversations')
  167. else
  168. say('Run ANALYZE to conversations...')
  169. ActiveRecord::Base.connection.execute('ANALYZE conversations')
  170. end
  171. end
  172. end
  173. end