gem 'browser'
gem 'charlock_holmes', '~> 0.7.7'
gem 'iso-639'
-gem 'chewy', '~> 5.2'
+gem 'chewy', '~> 7.2'
gem 'cld3', '~> 3.4.2'
gem 'devise', '~> 4.8'
gem 'devise-two-factor', '~> 4.0'
activesupport
cbor (0.5.9.6)
charlock_holmes (0.7.7)
- chewy (5.2.0)
+ chewy (7.2.3)
activesupport (>= 5.2)
- elasticsearch (>= 2.0.0)
+ elasticsearch (>= 7.12.0, < 7.14.0)
elasticsearch-dsl
chunky_png (1.4.0)
cld3 (3.4.2)
railties (>= 3.2)
e2mmap (0.1.0)
ed25519 (1.2.4)
- elasticsearch (7.10.1)
- elasticsearch-api (= 7.10.1)
- elasticsearch-transport (= 7.10.1)
- elasticsearch-api (7.10.1)
+ elasticsearch (7.13.3)
+ elasticsearch-api (= 7.13.3)
+ elasticsearch-transport (= 7.13.3)
+ elasticsearch-api (7.13.3)
multi_json
- elasticsearch-dsl (0.1.9)
- elasticsearch-transport (7.10.1)
+ elasticsearch-dsl (0.1.10)
+ elasticsearch-transport (7.13.3)
faraday (~> 1)
multi_json
encryptor (3.0.0)
fabrication (2.22.0)
faker (2.19.0)
i18n (>= 1.6, < 2)
- faraday (1.3.0)
+ faraday (1.8.0)
+ faraday-em_http (~> 1.0)
+ faraday-em_synchrony (~> 1.0)
+ faraday-excon (~> 1.1)
+ faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0)
+ faraday-net_http_persistent (~> 1.1)
+ faraday-patron (~> 1.0)
+ faraday-rack (~> 1.0)
multipart-post (>= 1.2, < 3)
- ruby2_keywords
+ ruby2_keywords (>= 0.0.4)
+ faraday-em_http (1.0.0)
+ faraday-em_synchrony (1.0.0)
+ faraday-excon (1.1.0)
+ faraday-httpclient (1.0.1)
faraday-net_http (1.0.1)
+ faraday-net_http_persistent (1.2.0)
+ faraday-patron (1.0.0)
+ faraday-rack (1.0.0)
fast_blank (1.0.1)
fastimage (2.2.5)
ffi (1.15.4)
ruby-saml (1.13.0)
nokogiri (>= 1.10.5)
rexml
- ruby2_keywords (0.0.4)
+ ruby2_keywords (0.0.5)
rufus-scheduler (3.7.0)
fugit (~> 1.1, >= 1.1.6)
safety_net_attestation (0.4.0)
capistrano-yarn (~> 2.0)
capybara (~> 3.36)
charlock_holmes (~> 0.7.7)
- chewy (~> 5.2)
+ chewy (~> 7.2)
cld3 (~> 3.4.2)
climate_control (~> 0.2)
color_diff (~> 0.1)
},
}
- define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do
- root date_detection: false do
- field :id, type: 'long'
+ index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? }
- field :display_name, type: 'text', analyzer: 'content' do
- field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
- end
+ root date_detection: false do
+ field :id, type: 'long'
- field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
- field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
- end
+ field :display_name, type: 'text', analyzer: 'content' do
+ field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
+ end
- field :following_count, type: 'long', value: ->(account) { account.following.local.count }
- field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
- field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
+ field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
+ field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
+
+ field :following_count, type: 'long', value: ->(account) { account.following.local.count }
+ field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
+ field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
end
end
},
}
- define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) do
- crutch :mentions do |collection|
- data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
- data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
- end
+ index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll)
- crutch :favourites do |collection|
- data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
- data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
- end
+ crutch :mentions do |collection|
+ data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
- crutch :reblogs do |collection|
- data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
- data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
- end
+ crutch :favourites do |collection|
+ data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
- crutch :bookmarks do |collection|
- data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
- data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
- end
+ crutch :reblogs do |collection|
+ data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
- root date_detection: false do
- field :id, type: 'long'
- field :account_id, type: 'long'
+ crutch :bookmarks do |collection|
+ data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
- field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
- field :stemmed, type: 'text', analyzer: 'content'
- end
+ root date_detection: false do
+ field :id, type: 'long'
+ field :account_id, type: 'long'
- field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
+ field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
+ field :stemmed, type: 'text', analyzer: 'content'
end
+
+ field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
end
end
},
}
- define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do
- root date_detection: false do
- field :name, type: 'text', analyzer: 'content' do
- field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
- end
+ index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? }
- field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
- field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
- field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
+ root date_detection: false do
+ field :name, type: 'text', analyzer: 'content' do
+ field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
+
+ field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
+ field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
+ field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
end
end
delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
- update_index('accounts#account', :self)
+ update_index('accounts', :self)
def local?
domain.nil?
belongs_to :account, inverse_of: :account_stat
- update_index('accounts#account', :account)
+ update_index('accounts', :account)
end
class Bookmark < ApplicationRecord
include Paginable
- update_index('statuses#status', :status) if Chewy.enabled?
+ update_index('statuses', :status) if Chewy.enabled?
belongs_to :account, inverse_of: :bookmarks
belongs_to :status, inverse_of: :bookmarks
class Favourite < ApplicationRecord
include Paginable
- update_index('statuses#status', :status)
+ update_index('statuses', :status)
belongs_to :account, inverse_of: :favourites
belongs_to :status, inverse_of: :favourites
# will be based on current time instead of `created_at`
attr_accessor :override_timestamps
- update_index('statuses#status', :proper)
+ update_index('statuses', :proper)
enum visibility: [:public, :unlisted, :private, :direct, :limited], _suffix: :visibility
scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) }
scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
- update_index('tags#tag', :self)
+ update_index('tags', :self)
def to_param
name
# Since we skipped all callbacks, we also need to manually
# deindex the statuses
- Chewy.strategy.current.update(StatusesIndex::Status, statuses_and_reblogs) if Chewy.enabled?
+ Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
return if options[:skip_side_effects]
@account.favourites.in_batches do |favourites|
ids = favourites.pluck(:status_id)
StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
- Chewy.strategy.current.update(StatusesIndex::Status, ids) if Chewy.enabled?
+ Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
favourites.delete_all
end
def purge_bookmarks!
@account.bookmarks.in_batches do |bookmarks|
- Chewy.strategy.current.update(StatusesIndex::Status, bookmarks.pluck(:status_id)) if Chewy.enabled?
+ Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
bookmarks.delete_all
end
end
# Mastodon is run with hidden services enabled, because
# ElasticSearch is *not* supposed to be accessed through a proxy
Faraday.ignore_env_proxy = true
-
-# Elasticsearch 7.x workaround
-Elasticsearch::Transport::Client.prepend Module.new {
- def search(arguments = {})
- arguments[:rest_total_hits_as_int] = true
- super arguments
- end
-}
-
-Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
- def create(arguments = {})
- arguments[:include_type_name] = true
- super arguments
- end
-
- def put_mapping(arguments = {})
- arguments[:include_type_name] = true
- super arguments
- end
-}
progress.title = 'Estimating workload '
# Estimate the amount of data that has to be imported first
- indices.each do |index|
- index.types.each do |type|
- progress.total = (progress.total || 0) + type.adapter.default_scope.count
- end
- end
+ progress.total = indices.sum { |index| index.adapter.default_scope.count }
# Now import all the actual data. Mind that unlike chewy:sync, we don't
# fetch and compare all record IDs from the database and the index to
batch_size = 1_000
slice_size = (batch_size / options[:concurrency]).ceil
- index.types.each do |type|
- type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
- futures = []
+ index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
+ futures = []
- batch.each_slice(slice_size) do |records|
- futures << Concurrent::Future.execute(executor: pool) do
- begin
- if !progress.total.nil? && progress.progress + records.size > progress.total
- # The number of items has changed between start and now,
- # since there is no good way to predict the final count from
- # here, just change the progress bar to an indeterminate one
+ batch.each_slice(slice_size) do |records|
+ futures << Concurrent::Future.execute(executor: pool) do
+ begin
+ if !progress.total.nil? && progress.progress + records.size > progress.total
+ # The number of items has changed between start and now,
+ # since there is no good way to predict the final count from
+ # here, just change the progress bar to an indeterminate one
- progress.total = nil
- end
+ progress.total = nil
+ end
- grouped_records = nil
- bulk_body = nil
- index_count = 0
- delete_count = 0
+ grouped_records = nil
+ bulk_body = nil
+ index_count = 0
+ delete_count = 0
- ActiveRecord::Base.connection_pool.with_connection do
- grouped_records = type.adapter.send(:grouped_objects, records)
- bulk_body = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body
+ ActiveRecord::Base.connection_pool.with_connection do
+ grouped_records = records.to_a.group_by do |record|
+ index.adapter.send(:delete_from_index?, record) ? :delete : :to_index
end
- index_count = grouped_records[:index].size if grouped_records.key?(:index)
- delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
-
- # The following is an optimization for statuses specifically, since
- # we want to de-index statuses that cannot be searched by anybody,
- # but can't use Chewy's delete_if logic because it doesn't use
- # crutches and our searchable_by logic depends on them
- if type == StatusesIndex::Status
- bulk_body.map! do |entry|
- if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
- index_count -= 1
- delete_count += 1
-
- { delete: entry[:index].except(:data) }
- else
- entry
- end
+ bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
+ end
+
+ index_count = grouped_records[:to_index].size if grouped_records.key?(:to_index)
+ delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
+
+ # The following is an optimization for statuses specifically, since
+ # we want to de-index statuses that cannot be searched by anybody,
+ # but can't use Chewy's delete_if logic because it doesn't use
+ # crutches and our searchable_by logic depends on them
+ if index == StatusesIndex
+ bulk_body.map! do |entry|
+ if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
+ index_count -= 1
+ delete_count += 1
+
+ { delete: entry[:to_index].except(:data) }
+ else
+ entry
end
end
+ end
- Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body)
+ Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
- progress.progress += records.size
+ progress.progress += records.size
- added.increment(index_count)
- removed.increment(delete_count)
+ added.increment(index_count)
+ removed.increment(delete_count)
- sleep 1
- rescue => e
- progress.log pastel.red("Error importing #{index}: #{e}")
- end
+ sleep 1
+ rescue => e
+ progress.log pastel.red("Error importing #{index}: #{e}")
end
end
-
- futures.map(&:value)
end
+
+ futures.map(&:value)
end
end