# frozen_string_literal: true
class LanguageDetector
- attr_reader :text, :account
+ include Singleton
- def initialize(text, account = nil)
- @text = text
- @account = account
+ def initialize
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
end
- def to_iso_s
- detected_language_code || default_locale
+ def detect(text, account)
+ detect_language_code(text) || default_locale(account)
end
- def prepared_text
- simplified_text.strip
+ def language_names
+ @language_names =
+ CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }
+ .uniq
end
private
- def detected_language_code
- iso6391(result.language).to_sym if detected_language_reliable?
+ def prepare_text(text)
+ simplify_text(text).strip
+ end
+
+ def detect_language_code(text)
+ result = @identifier.find_language(prepare_text(text))
+ iso6391(result.language.to_s).to_sym if result.reliable?
end
def iso6391(bcp47)
ISO_639.find(iso639).alpha2
end
- def result
- @result ||= @identifier.find_language(prepared_text)
- end
-
- def detected_language_reliable?
- result.reliable?
- end
-
- def simplified_text
+ def simplify_text(text)
text.dup.tap do |new_text|
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
new_text.gsub!(Account::MENTION_RE, '')
end
end
- def default_locale
- account&.user_locale&.to_sym || nil
+ def default_locale(account)
+ account.user_locale&.to_sym
end
end
sensitive: options[:sensitive],
spoiler_text: options[:spoiler_text] || '',
visibility: options[:visibility] || account.user&.setting_default_privacy,
- language: detect_language_for(text, account),
+ language: LanguageDetector.instance.detect(text, account),
application: options[:application])
attach_media(status, media)
media.update(status_id: status.id)
end
- def detect_language_for(text, account)
- LanguageDetector.new(text, account).to_iso_s
- end
-
def process_mentions_service
@process_mentions_service ||= ProcessMentionsService.new
end
require 'rails_helper'
describe LanguageDetector do
- describe 'prepared_text' do
+ describe 'prepare_text' do
it 'returns unmodified string without special cases' do
string = 'just a regular string'
- result = described_class.new(string).prepared_text
+ result = described_class.instance.send(:prepare_text, string)
expect(result).to eq string
end
it 'collapses spacing in strings' do
string = 'The formatting in this is very odd'
- result = described_class.new(string).prepared_text
+ result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'The formatting in this is very odd'
end
it 'strips usernames from strings before detection' do
string = '@username Yeah, very surreal...! also @friend'
- result = described_class.new(string).prepared_text
+ result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Yeah, very surreal...! also'
end
it 'strips URLs from strings before detection' do
string = 'Our website is https://example.com and also http://localhost.dev'
- result = described_class.new(string).prepared_text
+ result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Our website is and also'
end
it 'strips #hashtags from strings before detection' do
string = 'Hey look at all the #animals and #fish'
- result = described_class.new(string).prepared_text
+ result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Hey look at all the and'
end
end
- describe 'to_iso_s' do
+ describe 'detect' do
+ let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
+
it 'detects english language for basic strings' do
strings = [
"Hello and welcome to mastodon how are you today?",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
- result = described_class.new(string).to_iso_s
+ result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq(:en), string
end
it 'detects spanish language' do
string = 'Obtener un Hola y bienvenidos a Mastodon'
- result = described_class.new(string).to_iso_s
+ result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq :es
end
describe 'when language can\'t be detected' do
it 'uses nil when sent an empty document' do
- result = described_class.new('').to_iso_s
+ result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
expect(cld_result).not_to eq :en
- result = described_class.new(string).to_iso_s
+ result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq nil
end
describe 'with an account' do
it 'uses the account locale when present' do
account = double(user_locale: 'fr')
- result = described_class.new('', account).to_iso_s
+ result = described_class.instance.detect('', account)
expect(result).to eq :fr
end
it 'uses nil when account is present but has no locale' do
- account = double(user_locale: nil)
- result = described_class.new('', account).to_iso_s
+ result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
describe 'with an `en` default locale' do
it 'uses nil for undetectable string' do
- string = ''
- result = described_class.new(string).to_iso_s
+ result = described_class.instance.detect('', account_without_user_locale)
expect(result).to eq nil
end
it 'uses nil for undetectable string' do
string = ''
- result = described_class.new(string).to_iso_s
+ result = described_class.instance.detect(string, account_without_user_locale)
expect(result).to eq nil
end