]> cat aescling's git repositories - mastodon.git/commitdiff
Change search indexing to use batches to minimize resource usage (#18451)
authorEugen Rochko <eugen@zeonfederated.com>
Wed, 18 May 2022 21:29:14 +0000 (23:29 +0200)
committersingle-right-quote <11325618-aescling@users.noreply.gitlab.com>
Fri, 27 May 2022 03:53:33 +0000 (23:53 -0400)
Gemfile
app/chewy/accounts_index.rb
app/chewy/statuses_index.rb
app/chewy/tags_index.rb
app/workers/scheduler/indexing_scheduler.rb [new file with mode: 0644]
config/application.rb
config/initializers/chewy.rb
config/sidekiq.yml
lib/chewy/strategy/custom_sidekiq.rb [deleted file]
lib/chewy/strategy/mastodon.rb [new file with mode: 0644]

diff --git a/Gemfile b/Gemfile
index 7f634f917908df58685891e0762e669187868cad..84a9ffa6e189644b251a5cdc0274957b90f4759f 100644 (file)
--- a/Gemfile
+++ b/Gemfile
@@ -81,7 +81,7 @@ gem 'scenic', '~> 1.6'
 gem 'sidekiq', '~> 6.4'
 gem 'sidekiq-scheduler', '~> 4.0'
 gem 'sidekiq-unique-jobs', '~> 7.1'
-gem 'sidekiq-bulk', '~>0.2.0'
+gem 'sidekiq-bulk', '~> 0.2.0'
 gem 'simple-navigation', '~> 4.3'
 gem 'simple_form', '~> 5.1'
 gem 'sprockets-rails', '~> 3.4', require: 'sprockets/railtie'
index 6f9ea76e9a933b8fce8fddf1842aa024bcc84a55..763958a3f95363a1b8a0cfb00314ffb6dd1b03ff 100644 (file)
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class AccountsIndex < Chewy::Index
-  settings index: { refresh_interval: '5m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'whitespace',
index 1304aeedbdd36b097a190033b1df79caa8e8bc8e..c20009879972db0a6b7d3330557e83bd26fff045 100644 (file)
@@ -3,7 +3,7 @@
 class StatusesIndex < Chewy::Index
   include FormattingHelper
 
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     filter: {
       english_stop: {
         type: 'stop',
index f9db2b03af172458a896fdfa721373b66365d617..a5b139bcaaceeb96e260351c0923888c1a901f37 100644 (file)
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class TagsIndex < Chewy::Index
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'keyword',
diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb
new file mode 100644 (file)
index 0000000..3a6f47a
--- /dev/null
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+class Scheduler::IndexingScheduler
+  include Sidekiq::Worker
+  include Redisable
+
+  sidekiq_options retry: 0
+
+  def perform
+    indexes.each do |type|
+      with_redis do |redis|
+        ids = redis.smembers("chewy:queue:#{type.name}")
+
+        type.import!(ids)
+
+        redis.pipelined do |pipeline|
+          ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) }
+        end
+      end
+    end
+  end
+
+  def indexes
+    [AccountsIndex, TagsIndex, StatusesIndex]
+  end
+end
index 64987cfe7b1a97041d713080ea8193170cf69e96..24fa2a978197ef4662de8959c0f518e7ecedc434 100644 (file)
@@ -38,7 +38,7 @@ require_relative '../lib/mastodon/version'
 require_relative '../lib/mastodon/rack_middleware'
 require_relative '../lib/devise/two_factor_ldap_authenticatable'
 require_relative '../lib/devise/two_factor_pam_authenticatable'
-require_relative '../lib/chewy/strategy/custom_sidekiq'
+require_relative '../lib/chewy/strategy/mastodon'
 require_relative '../lib/webpacker/manifest_extensions'
 require_relative '../lib/webpacker/helper_extensions'
 require_relative '../lib/rails/engine_extensions'
index f303fc54d3a1a4a4990ac222100e681e2ea5576a..752fc3c6dfe55105bf9621b2203403a2236e16d9 100644 (file)
@@ -13,15 +13,14 @@ Chewy.settings = {
   journal: false,
   user: user,
   password: password,
-  sidekiq: { queue: 'pull' },
 }
 
 # We use our own async strategy even outside the request-response
 # cycle, which takes care of checking if Elasticsearch is enabled
 # or not. However, mind that for the Rails console, the :urgent
 # strategy is set automatically with no way to override it.
-Chewy.root_strategy              = :custom_sidekiq
-Chewy.request_strategy           = :custom_sidekiq
+Chewy.root_strategy              = :mastodon
+Chewy.request_strategy           = :mastodon
 Chewy.use_after_commit_callbacks = false
 
 module Chewy
index 26be263265963bb090a7bc4df663433374a177ec..2a3871468460f3a62a0294c0af42ca2cefc7a66d 100644 (file)
     every: '6h'
     class: Scheduler::Trends::ReviewNotificationsScheduler
     queue: scheduler
+  indexing_scheduler:
+    every: '5m'
+    class: Scheduler::IndexingScheduler
+    queue: scheduler
   media_cleanup_scheduler:
     cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
     class: Scheduler::MediaCleanupScheduler
diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb
deleted file mode 100644 (file)
index 794ae4e..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-# frozen_string_literal: true
-
-module Chewy
-  class Strategy
-    class CustomSidekiq < Sidekiq
-      def update(_type, _objects, _options = {})
-        super if Chewy.enabled?
-      end
-    end
-  end
-end
diff --git a/lib/chewy/strategy/mastodon.rb b/lib/chewy/strategy/mastodon.rb
new file mode 100644 (file)
index 0000000..ee8b921
--- /dev/null
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Chewy
+  class Strategy
+    class Mastodon < Base
+      def initialize
+        super
+
+        @stash = Hash.new { |hash, key| hash[key] = [] }
+      end
+
+      def update(type, objects, _options = {})
+        @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
+      end
+
+      def leave
+        RedisConfiguration.with do |redis|
+          redis.pipelined do |pipeline|
+            @stash.each do |type, ids|
+              pipeline.sadd("chewy:queue:#{type.name}", ids)
+            end
+          end
+        end
+      end
+    end
+  end
+end