gem 'kaminari', '~> 1.1'
gem 'link_header', '~> 0.0'
gem 'mime-types', '~> 3.2', require: 'mime/types/columnar'
+gem 'nilsimsa', git: 'https://github.com/witgo/nilsimsa', ref: 'fd184883048b922b176939f851338d0a4971a532'
gem 'nokogiri', '~> 1.10'
gem 'nsa', '~> 0.2'
gem 'oj', '~> 3.7'
specs:
http_parser.rb (0.6.1)
+GIT
+ remote: https://github.com/witgo/nilsimsa
+ revision: fd184883048b922b176939f851338d0a4971a532
+ ref: fd184883048b922b176939f851338d0a4971a532
+ specs:
+ nilsimsa (1.1.2)
+
GEM
remote: https://rubygems.org/
specs:
microformats (~> 4.1)
mime-types (~> 3.2)
net-ldap (~> 0.10)
+ nilsimsa!
nokogiri (~> 1.10)
nsa (~> 0.2)
oj (~> 3.7)
resolve_thread(@status)
fetch_replies(@status)
+ check_for_spam
distribute(@status)
forward_for_reply if @status.distributable?
end
Account.local.where(username: local_usernames).exists?
end
+ def check_for_spam
+ spam_check = SpamCheck.new(@status)
+
+ return if spam_check.skip?
+
+ if spam_check.spam?
+ spam_check.flag!
+ else
+ spam_check.remember!
+ end
+ end
+
def forward_for_reply
return unless @json['signature'].present? && reply_to_local?
ActivityPub::RawDistributionWorker.perform_async(Oj.dump(@json), replied_to_status.account_id, [@account.preferred_inbox_url])
--- /dev/null
+# frozen_string_literal: true
+
+class SpamCheck
+ include Redisable
+ include ActionView::Helpers::TextHelper
+
+ NILSIMSA_COMPARE_THRESHOLD = 95
+ NILSIMSA_MIN_SIZE = 10
+ EXPIRE_SET_AFTER = 1.week.seconds
+
+ def initialize(status)
+ @account = status.account
+ @status = status
+ end
+
+ def skip?
+ already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
+ end
+
+ def spam?
+ if insufficient_data?
+ false
+ elsif nilsimsa?
+ any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
+ else
+ any_other_digest?('md5') { |_, other_digest| other_digest == digest }
+ end
+ end
+
+ def flag!
+ auto_silence_account!
+ auto_report_status!
+ end
+
+ def remember!
+ # The scores in sorted sets don't actually have enough bits to hold an exact
+ # value of our snowflake IDs, so we use it only for its ordering property. To
+ # get the correct status ID back, we have to save it in the string value
+
+ redis.zadd(redis_key, @status.id, digest_with_algorithm)
+ redis.zremrangebyrank(redis_key, '0', '-10')
+ redis.expire(redis_key, EXPIRE_SET_AFTER)
+ end
+
+ def reset!
+ redis.del(redis_key)
+ end
+
+ def hashable_text
+ return @hashable_text if defined?(@hashable_text)
+
+ @hashable_text = @status.text
+ @hashable_text = remove_mentions(@hashable_text)
+ @hashable_text = strip_tags(@hashable_text) unless @status.local?
+ @hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
+ @hashable_text = remove_whitespace(@hashable_text)
+ end
+
+ def insufficient_data?
+ hashable_text.blank?
+ end
+
+ def digest
+ @digest ||= begin
+ if nilsimsa?
+ Nilsimsa.new(hashable_text).hexdigest
+ else
+ Digest::MD5.hexdigest(hashable_text)
+ end
+ end
+ end
+
+ def digest_with_algorithm
+ if nilsimsa?
+ ['nilsimsa', digest, @status.id].join(':')
+ else
+ ['md5', digest, @status.id].join(':')
+ end
+ end
+
+ private
+
+ def remove_mentions(text)
+ return text.gsub(Account::MENTION_RE, '') if @status.local?
+
+ Nokogiri::HTML.fragment(text).tap do |html|
+ mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
+
+ html.traverse do |element|
+ element.unlink if element.name == 'a' && mentions.include?(element['href'])
+ end
+ end.to_s
+ end
+
+ def normalize_unicode(text)
+ text.unicode_normalize(:nfkc).downcase
+ end
+
+ def remove_whitespace(text)
+ text.gsub(/\s+/, ' ').strip
+ end
+
+ def auto_silence_account!
+ @account.silence!
+ end
+
+ def auto_report_status!
+ status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
+ ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected_and_silenced'))
+ end
+
+ def already_flagged?
+ @account.silenced?
+ end
+
+ def trusted?
+ @account.trust_level > Account::TRUST_LEVELS[:untrusted]
+ end
+
+ def no_unsolicited_mentions?
+ @status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
+ end
+
+ def solicited_reply?
+ !@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
+ end
+
+ def nilsimsa_compare_value(first, second)
+ first = [first].pack('H*')
+ second = [second].pack('H*')
+ bits = 0
+
+ 0.upto(31) do |i|
+ bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
+ end
+
+ 128 - bits # -128 <= Nilsimsa Compare Value <= 128
+ end
+
+ def nilsimsa?
+ hashable_text.size > NILSIMSA_MIN_SIZE
+ end
+
+ def other_digests
+ redis.zrange(redis_key, 0, -1)
+ end
+
+ def any_other_digest?(filter_algorithm)
+ other_digests.any? do |record|
+ algorithm, other_digest, status_id = record.split(':')
+
+ next unless algorithm == filter_algorithm
+
+ yield algorithm, other_digest, status_id
+ end
+ end
+
+ def matching_status_ids
+ if nilsimsa?
+ other_digests.select { |record| record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }.map { |record| record.split(':')[2] }.compact
+ else
+ other_digests.select { |record| record.start_with?('md5') && record.split(':')[1] == digest }.map { |record| record.split(':')[2] }.compact
+ end
+ end
+
+ def redis_key
+ @redis_key ||= "spam_check:#{@account.id}"
+ end
+end
# also_known_as :string is an Array
# silenced_at :datetime
# suspended_at :datetime
+# trust_level :integer
#
class Account < ApplicationRecord
include AccountCounters
include DomainNormalizable
+ TRUST_LEVELS = {
+ untrusted: 0,
+ trusted: 1,
+ }.freeze
+
enum protocol: [:ostatus, :activitypub]
validates :username, presence: true
last_webfingered_at.nil? || last_webfingered_at <= 1.day.ago
end
+ def trust_level
+ self[:trust_level] || 0
+ end
+
def refresh!
ResolveAccountService.new.call(acct) unless local?
end
silenced_at.present?
end
- def silence!(date = nil)
- date ||= Time.now.utc
+ def silence!(date = Time.now.utc)
update!(silenced_at: date)
end
def unsilence!
- update!(silenced_at: nil)
+ update!(silenced_at: nil, trust_level: trust_level == TRUST_LEVELS[:untrusted] ? TRUST_LEVELS[:trusted] : trust_level)
end
def suspended?
suspended_at.present?
end
- def suspend!(date = nil)
- date ||= Time.now.utc
+ def suspend!(date = Time.now.utc)
transaction do
user&.disable! if local?
update!(suspended_at: date)
remove_from_hashtags
remove_from_public
remove_from_media if status.media_attachments.any?
+ remove_from_spam_check
@status.destroy!
else
redis.publish('timeline:public:local:media', @payload) if @status.local?
end
+ def remove_from_spam_check
+ redis.zremrangebyscore("spam_check:#{@status.account_id}", @status.id, @status.id)
+ end
+
def lock_options
{ redis: Redis.current, key: "distribute:#{@status.id}" }
end
profile: Profile
relationships: Follows and followers
two_factor_authentication: Two-factor Auth
+ spam_check:
+ spam_detected_and_silenced: This is an automated report. Spam has been detected and the sender has been silenced automatically. If this is a mistake, please unsilence the account.
statuses:
attached:
description: 'Attached: %{attached}'
--- /dev/null
+class AddTrustLevelToAccounts < ActiveRecord::Migration[5.2]
+ def change
+ add_column :accounts, :trust_level, :integer
+ end
+end
t.string "also_known_as", array: true
t.datetime "silenced_at"
t.datetime "suspended_at"
+ t.integer "trust_level"
t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true
t.index ["moved_to_account_id"], name: "index_accounts_on_moved_to_account_id"
--- /dev/null
+require 'rails_helper'
+
+RSpec.describe SpamCheck do
+ let!(:sender) { Fabricate(:account) }
+ let!(:alice) { Fabricate(:account, username: 'alice') }
+ let!(:bob) { Fabricate(:account, username: 'bob') }
+
+ def status_with_html(text, options = {})
+ status = PostStatusService.new.call(sender, { text: text }.merge(options))
+ status.update_columns(text: Formatter.instance.format(status), local: false)
+ status
+ end
+
+ describe '#hashable_text' do
+ it 'removes mentions from HTML for remote statuses' do
+ status = status_with_html('@alice Hello')
+ expect(described_class.new(status).hashable_text).to eq 'hello'
+ end
+
+ it 'removes mentions from text for local statuses' do
+ status = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+ expect(described_class.new(status).hashable_text).to eq 'hey , how are you?'
+ end
+ end
+
+ describe '#insufficient_data?' do
+ it 'returns true when there is no text' do
+ status = status_with_html('@alice')
+ expect(described_class.new(status).insufficient_data?).to be true
+ end
+
+ it 'returns false when there is text' do
+ status = status_with_html('@alice h')
+ expect(described_class.new(status).insufficient_data?).to be false
+ end
+ end
+
+ describe '#digest' do
+ it 'returns a string' do
+ status = status_with_html('@alice Hello world')
+ expect(described_class.new(status).digest).to be_a String
+ end
+ end
+
+ describe '#spam?' do
+ it 'returns false for a unique status' do
+ status = status_with_html('@alice Hello')
+ expect(described_class.new(status).spam?).to be false
+ end
+
+ it 'returns false for different statuses to the same recipient' do
+ status1 = status_with_html('@alice Hello')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@alice Are you available to talk?')
+ expect(described_class.new(status2).spam?).to be false
+ end
+
+ it 'returns false for statuses with different content warnings' do
+ status1 = status_with_html('@alice Are you available to talk?')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@alice Are you available to talk?', spoiler_text: 'This is a completely different matter than what I was talking about previously, I swear!')
+ expect(described_class.new(status2).spam?).to be false
+ end
+
+ it 'returns false for different statuses to different recipients' do
+ status1 = status_with_html('@alice How is it going?')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@bob Are you okay?')
+ expect(described_class.new(status2).spam?).to be false
+ end
+
+ it 'returns false for very short different statuses to different recipients' do
+ status1 = status_with_html('@alice 🙄')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@bob Huh?')
+ expect(described_class.new(status2).spam?).to be false
+ end
+
+ it 'returns false for statuses with no text' do
+ status1 = status_with_html('@alice')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@bob')
+ expect(described_class.new(status2).spam?).to be false
+ end
+
+ it 'returns true for duplicate statuses to the same recipient' do
+ status1 = status_with_html('@alice Hello')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@alice Hello')
+ expect(described_class.new(status2).spam?).to be true
+ end
+
+ it 'returns true for duplicate statuses to different recipients' do
+ status1 = status_with_html('@alice Hello')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@bob Hello')
+ expect(described_class.new(status2).spam?).to be true
+ end
+
+ it 'returns true for nearly identical statuses with random numbers' do
+ source_text = 'Sodium, atomic number 11, was first isolated by Humphry Davy in 1807. A chemical component of salt, he named it Na in honor of the saltiest region on earth, North America.'
+ status1 = status_with_html('@alice ' + source_text + ' 1234')
+ described_class.new(status1).remember!
+ status2 = status_with_html('@bob ' + source_text + ' 9568')
+ expect(described_class.new(status2).spam?).to be true
+ end
+ end
+
+ describe '#skip?' do
+ it 'returns true when the sender is already silenced' do
+ status = status_with_html('@alice Hello')
+ sender.silence!
+ expect(described_class.new(status).skip?).to be true
+ end
+
+ it 'returns true when the mentioned person follows the sender' do
+ status = status_with_html('@alice Hello')
+ alice.follow!(sender)
+ expect(described_class.new(status).skip?).to be true
+ end
+
+ it 'returns false when even one mentioned person doesn\'t follow the sender' do
+ status = status_with_html('@alice @bob Hello')
+ alice.follow!(sender)
+ expect(described_class.new(status).skip?).to be false
+ end
+
+ it 'returns true when the sender is replying to a status that mentions the sender' do
+ parent = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+ status = status_with_html('@alice @bob Hello', thread: parent)
+ expect(described_class.new(status).skip?).to be true
+ end
+ end
+
+ describe '#remember!' do
+ pending
+ end
+
+ describe '#flag!' do
+ let!(:status1) { status_with_html('@alice General Kenobi you are a bold one') }
+ let!(:status2) { status_with_html('@alice @bob General Kenobi, you are a bold one') }
+
+ before do
+ described_class.new(status1).remember!
+ described_class.new(status2).flag!
+ end
+
+ it 'silences the account' do
+ expect(sender.silenced?).to be true
+ end
+
+ it 'creates a report about the account' do
+ expect(sender.targeted_reports.unresolved.count).to eq 1
+ end
+
+ it 'attaches both matching statuses to the report' do
+ expect(sender.targeted_reports.first.status_ids).to include(status1.id, status2.id)
+ end
+ end
+end