gem 'kaminari', '~> 1.2'
gem 'link_header', '~> 0.0'
gem 'mime-types', '~> 3.3.1', require: 'mime/types/columnar'
-gem 'nilsimsa', git: 'https://github.com/witgo/nilsimsa', ref: 'fd184883048b922b176939f851338d0a4971a532'
gem 'nokogiri', '~> 1.11'
gem 'nsa', git: 'https://github.com/Gargron/nsa', ref: 'd1079e0cdafdfed7f9f35478d13b9bdaa65965c0'
gem 'oj', '~> 3.11'
activerecord (>= 6.1.0)
activesupport (>= 6.1.0)
-GIT
- remote: https://github.com/witgo/nilsimsa
- revision: fd184883048b922b176939f851338d0a4971a532
- ref: fd184883048b922b176939f851338d0a4971a532
- specs:
- nilsimsa (1.1.2)
-
GEM
remote: https://rubygems.org/
specs:
microformats (~> 4.2)
mime-types (~> 3.3.1)
net-ldap (~> 0.17)
- nilsimsa!
nokogiri (~> 1.11)
nsa!
oj (~> 3.11)
@whitelist_enabled = whitelist_mode?
@profile_directory = Setting.profile_directory
@timeline_preview = Setting.timeline_preview
- @spam_check_enabled = Setting.spam_check_enabled
@trends_enabled = Setting.trends
end
resolve_thread(@status)
fetch_replies(@status)
- check_for_spam
distribute(@status)
forward_for_reply
end
Tombstone.exists?(uri: object_uri)
end
- def check_for_spam
- SpamCheck.perform(@status)
- end
-
def forward_for_reply
return unless @status.distributable? && @json['signature'].present? && reply_to_local?
+++ /dev/null
-# frozen_string_literal: true
-
-class SpamCheck
- include Redisable
- include ActionView::Helpers::TextHelper
-
- # Threshold over which two Nilsimsa values are considered
- # to refer to the same text
- NILSIMSA_COMPARE_THRESHOLD = 95
-
- # Nilsimsa doesn't work well on small inputs, so below
- # this size, we check only for exact matches with MD5
- NILSIMSA_MIN_SIZE = 10
-
- # How long to keep the trail of digests between updates,
- # there is no reason to store it forever
- EXPIRE_SET_AFTER = 1.week.seconds
-
- # How many digests to keep in an account's trail. If it's
- # too small, spam could rotate around different message templates
- MAX_TRAIL_SIZE = 10
-
- # How many detected duplicates to allow through before
- # considering the message as spam
- THRESHOLD = 5
-
- def initialize(status)
- @account = status.account
- @status = status
- end
-
- def skip?
- disabled? || already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
- end
-
- def spam?
- if insufficient_data?
- false
- elsif nilsimsa?
- digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
- else
- digests_over_threshold?('md5') { |_, other_digest| other_digest == digest }
- end
- end
-
- def flag!
- auto_report_status!
- end
-
- def remember!
- # The scores in sorted sets don't actually have enough bits to hold an exact
- # value of our snowflake IDs, so we use it only for its ordering property. To
- # get the correct status ID back, we have to save it in the string value
-
- redis.zadd(redis_key, @status.id, digest_with_algorithm)
- redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1))
- redis.expire(redis_key, EXPIRE_SET_AFTER)
- end
-
- def reset!
- redis.del(redis_key)
- end
-
- def hashable_text
- return @hashable_text if defined?(@hashable_text)
-
- @hashable_text = @status.text
- @hashable_text = remove_mentions(@hashable_text)
- @hashable_text = strip_tags(@hashable_text) unless @status.local?
- @hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
- @hashable_text = remove_whitespace(@hashable_text)
- end
-
- def insufficient_data?
- hashable_text.blank?
- end
-
- def digest
- @digest ||= begin
- if nilsimsa?
- Nilsimsa.new(hashable_text).hexdigest
- else
- Digest::MD5.hexdigest(hashable_text)
- end
- end
- end
-
- def digest_with_algorithm
- if nilsimsa?
- ['nilsimsa', digest, @status.id].join(':')
- else
- ['md5', digest, @status.id].join(':')
- end
- end
-
- class << self
- def perform(status)
- spam_check = new(status)
-
- return if spam_check.skip?
-
- if spam_check.spam?
- spam_check.flag!
- else
- spam_check.remember!
- end
- end
- end
-
- private
-
- def disabled?
- !Setting.spam_check_enabled
- end
-
- def remove_mentions(text)
- return text.gsub(Account::MENTION_RE, '') if @status.local?
-
- Nokogiri::HTML.fragment(text).tap do |html|
- mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
-
- html.traverse do |element|
- element.unlink if element.name == 'a' && mentions.include?(element['href'])
- end
- end.to_s
- end
-
- def normalize_unicode(text)
- text.unicode_normalize(:nfkc).downcase
- end
-
- def remove_whitespace(text)
- text.gsub(/\s+/, ' ').strip
- end
-
- def auto_report_status!
- status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
- ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected'))
- end
-
- def already_flagged?
- @account.silenced? || @account.targeted_reports.unresolved.where(account_id: -99).exists?
- end
-
- def trusted?
- @account.trust_level > Account::TRUST_LEVELS[:untrusted] || (@account.local? && @account.user_staff?)
- end
-
- def no_unsolicited_mentions?
- @status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
- end
-
- def solicited_reply?
- !@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
- end
-
- def nilsimsa_compare_value(first, second)
- first = [first].pack('H*')
- second = [second].pack('H*')
- bits = 0
-
- 0.upto(31) do |i|
- bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
- end
-
- 128 - bits # -128 <= Nilsimsa Compare Value <= 128
- end
-
- def nilsimsa?
- hashable_text.size > NILSIMSA_MIN_SIZE
- end
-
- def other_digests
- redis.zrange(redis_key, 0, -1)
- end
-
- def digests_over_threshold?(filter_algorithm)
- other_digests.select do |record|
- algorithm, other_digest, status_id = record.split(':')
-
- next unless algorithm == filter_algorithm
-
- yield algorithm, other_digest, status_id
- end.size >= THRESHOLD
- end
-
- def matching_status_ids
- if nilsimsa?
- other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }
- else
- other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('md5') && record.split(':')[1] == digest }
- end
- end
-
- def redis_key
- @redis_key ||= "spam_check:#{@account.id}"
- end
-end
thumbnail
hero
mascot
- spam_check_enabled
trends
trendable_by_default
show_domain_blocks
show_known_fediverse_at_about_page
preview_sensitive_media
profile_directory
- spam_check_enabled
trends
trendable_by_default
noindex
end
status.save!
- check_for_spam(status)
mentions.each { |mention| create_notification(mention) }
end
def resolve_account_service
ResolveAccountService.new
end
-
- def check_for_spam(status)
- SpamCheck.perform(status)
- end
end
remove_from_hashtags
remove_from_public
remove_from_media if @status.media_attachments.any?
- remove_from_spam_check
remove_media
end
@status.media_attachments.destroy_all
end
- def remove_from_spam_check
- redis.zremrangebyscore("spam_check:#{@status.account_id}", @status.id, @status.id)
- end
-
def lock_options
{ redis: Redis.current, key: "distribute:#{@status.id}" }
end
= feature_hint(link_to(t('admin.dashboard.trends'), edit_admin_settings_path), @trends_enabled)
%li
= feature_hint(link_to(t('admin.dashboard.feature_relay'), admin_relays_path), @relay_enabled)
- %li
- = feature_hint(link_to(t('admin.dashboard.feature_spam_check'), edit_admin_settings_path), @spam_check_enabled)
.dashboard__widgets__versions
%div
.fields-group
= f.input :noindex, as: :boolean, wrapper: :with_label, label: t('admin.settings.default_noindex.title'), hint: t('admin.settings.default_noindex.desc_html')
- .fields-group
- = f.input :spam_check_enabled, as: :boolean, wrapper: :with_label, label: t('admin.settings.spam_check_enabled.title'), hint: t('admin.settings.spam_check_enabled.desc_html')
-
%hr.spacer/
.fields-group
feature_profile_directory: Profile directory
feature_registrations: Registrations
feature_relay: Federation relay
- feature_spam_check: Anti-spam
feature_timeline_preview: Timeline preview
features: Features
hidden_service: Federation with hidden services
desc_html: You can write your own privacy policy, terms of service or other legalese. You can use HTML tags
title: Custom terms of service
site_title: Server name
- spam_check_enabled:
- desc_html: Mastodon can auto-report accounts that send repeated unsolicited messages. There may be false positives.
- title: Anti-spam automation
thumbnail:
desc_html: Used for previews via OpenGraph and API. 1200x630px recommended
title: Server thumbnail
relationships: Follows and followers
two_factor_authentication: Two-factor Auth
webauthn_authentication: Security keys
- spam_check:
- spam_detected: This is an automated report. Spam has been detected.
statuses:
attached:
audio:
activity_api_enabled: true
peers_api_enabled: true
show_known_fediverse_at_about_page: true
- spam_check_enabled: true
show_domain_blocks: 'disabled'
show_domain_blocks_rationale: 'disabled'
require_invite_text: false
+++ /dev/null
-# frozen_string_literal: true
-
-require 'rails_helper'
-
-RSpec.describe SpamCheck do
- let!(:sender) { Fabricate(:account) }
- let!(:alice) { Fabricate(:account, username: 'alice') }
- let!(:bob) { Fabricate(:account, username: 'bob') }
-
- def status_with_html(text, options = {})
- status = PostStatusService.new.call(sender, { text: text }.merge(options))
- status.update_columns(text: Formatter.instance.format(status), local: false)
- status
- end
-
- describe '#hashable_text' do
- it 'removes mentions from HTML for remote statuses' do
- status = status_with_html('@alice Hello')
- expect(described_class.new(status).hashable_text).to eq 'hello'
- end
-
- it 'removes mentions from text for local statuses' do
- status = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
- expect(described_class.new(status).hashable_text).to eq 'hey , how are you?'
- end
- end
-
- describe '#insufficient_data?' do
- it 'returns true when there is no text' do
- status = status_with_html('@alice')
- expect(described_class.new(status).insufficient_data?).to be true
- end
-
- it 'returns false when there is text' do
- status = status_with_html('@alice h')
- expect(described_class.new(status).insufficient_data?).to be false
- end
- end
-
- describe '#digest' do
- it 'returns a string' do
- status = status_with_html('@alice Hello world')
- expect(described_class.new(status).digest).to be_a String
- end
- end
-
- describe '#spam?' do
- it 'returns false for a unique status' do
- status = status_with_html('@alice Hello')
- expect(described_class.new(status).spam?).to be false
- end
-
- it 'returns false for different statuses to the same recipient' do
- status1 = status_with_html('@alice Hello')
- described_class.new(status1).remember!
- status2 = status_with_html('@alice Are you available to talk?')
- expect(described_class.new(status2).spam?).to be false
- end
-
- it 'returns false for statuses with different content warnings' do
- status1 = status_with_html('@alice Are you available to talk?')
- described_class.new(status1).remember!
- status2 = status_with_html('@alice Are you available to talk?', spoiler_text: 'This is a completely different matter than what I was talking about previously, I swear!')
- expect(described_class.new(status2).spam?).to be false
- end
-
- it 'returns false for different statuses to different recipients' do
- status1 = status_with_html('@alice How is it going?')
- described_class.new(status1).remember!
- status2 = status_with_html('@bob Are you okay?')
- expect(described_class.new(status2).spam?).to be false
- end
-
- it 'returns false for very short different statuses to different recipients' do
- status1 = status_with_html('@alice 🙄')
- described_class.new(status1).remember!
- status2 = status_with_html('@bob Huh?')
- expect(described_class.new(status2).spam?).to be false
- end
-
- it 'returns false for statuses with no text' do
- status1 = status_with_html('@alice')
- described_class.new(status1).remember!
- status2 = status_with_html('@bob')
- expect(described_class.new(status2).spam?).to be false
- end
-
- it 'returns true for duplicate statuses to the same recipient' do
- described_class::THRESHOLD.times do
- status1 = status_with_html('@alice Hello')
- described_class.new(status1).remember!
- end
-
- status2 = status_with_html('@alice Hello')
- expect(described_class.new(status2).spam?).to be true
- end
-
- it 'returns true for duplicate statuses to different recipients' do
- described_class::THRESHOLD.times do
- status1 = status_with_html('@alice Hello')
- described_class.new(status1).remember!
- end
-
- status2 = status_with_html('@bob Hello')
- expect(described_class.new(status2).spam?).to be true
- end
-
- it 'returns true for nearly identical statuses with random numbers' do
- source_text = 'Sodium, atomic number 11, was first isolated by Humphry Davy in 1807. A chemical component of salt, he named it Na in honor of the saltiest region on earth, North America.'
-
- described_class::THRESHOLD.times do
- status1 = status_with_html('@alice ' + source_text + ' 1234')
- described_class.new(status1).remember!
- end
-
- status2 = status_with_html('@bob ' + source_text + ' 9568')
- expect(described_class.new(status2).spam?).to be true
- end
- end
-
- describe '#skip?' do
- it 'returns true when the sender is already silenced' do
- status = status_with_html('@alice Hello')
- sender.silence!
- expect(described_class.new(status).skip?).to be true
- end
-
- it 'returns true when the mentioned person follows the sender' do
- status = status_with_html('@alice Hello')
- alice.follow!(sender)
- expect(described_class.new(status).skip?).to be true
- end
-
- it 'returns false when even one mentioned person doesn\'t follow the sender' do
- status = status_with_html('@alice @bob Hello')
- alice.follow!(sender)
- expect(described_class.new(status).skip?).to be false
- end
-
- it 'returns true when the sender is replying to a status that mentions the sender' do
- parent = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
- status = status_with_html('@alice @bob Hello', thread: parent)
- expect(described_class.new(status).skip?).to be true
- end
- end
-
- describe '#remember!' do
- let(:status) { status_with_html('@alice') }
- let(:spam_check) { described_class.new(status) }
- let(:redis_key) { spam_check.send(:redis_key) }
-
- it 'remembers' do
- expect(Redis.current.exists?(redis_key)).to be true
- spam_check.remember!
- expect(Redis.current.exists?(redis_key)).to be true
- end
- end
-
- describe '#reset!' do
- let(:status) { status_with_html('@alice') }
- let(:spam_check) { described_class.new(status) }
- let(:redis_key) { spam_check.send(:redis_key) }
-
- before do
- spam_check.remember!
- end
-
- it 'resets' do
- expect(Redis.current.exists?(redis_key)).to be true
- spam_check.reset!
- expect(Redis.current.exists?(redis_key)).to be false
- end
- end
-
- describe '#flag!' do
- let!(:status1) { status_with_html('@alice General Kenobi you are a bold one') }
- let!(:status2) { status_with_html('@alice @bob General Kenobi, you are a bold one') }
-
- before do
- described_class.new(status1).remember!
- described_class.new(status2).flag!
- end
-
- it 'creates a report about the account' do
- expect(sender.targeted_reports.unresolved.count).to eq 1
- end
-
- it 'attaches both matching statuses to the report' do
- expect(sender.targeted_reports.first.status_ids).to include(status1.id, status2.id)
- end
- end
-end