detected_language_code || default_locale.to_sym
end
+ def prepared_text
+ simplified_text.strip
+ end
+
private
def detected_language_code
end
def result
- @result ||= @identifier.find_language(text_without_urls)
+ @result ||= @identifier.find_language(prepared_text)
end
def detected_language_reliable?
result.reliable?
end
- def text_without_urls
+ def simplified_text
text.dup.tap do |new_text|
URI.extract(new_text).each do |url|
new_text.gsub!(url, '')
end
+ new_text.gsub!(Account::MENTION_RE, '')
+ new_text.gsub!(Tag::HASHTAG_RE, '')
+ new_text.gsub!(/\s+/, ' ')
end
end
# frozen_string_literal: true
+
require 'rails_helper'
describe LanguageDetector do
+ describe 'prepared_text' do
+ it 'returns unmodified string without special cases' do
+ string = 'just a regular string'
+ result = described_class.new(string).prepared_text
+
+ expect(result).to eq string
+ end
+
+ it 'collapses spacing in strings' do
+ string = 'The formatting in this is very odd'
+
+ result = described_class.new(string).prepared_text
+ expect(result).to eq 'The formatting in this is very odd'
+ end
+
+ it 'strips usernames from strings before detection' do
+ string = '@username Yeah, very surreal...! also @friend'
+
+ result = described_class.new(string).prepared_text
+ expect(result).to eq 'Yeah, very surreal...! also'
+ end
+
+ it 'strips URLs from strings before detection' do
+ string = 'Our website is https://example.com and also http://localhost.dev'
+
+ result = described_class.new(string).prepared_text
+ expect(result).to eq 'Our website is and also'
+ end
+
+ it 'strips #hashtags from strings before detection' do
+ string = 'Hey look at all the #animals and #fish'
+
+ result = described_class.new(string).prepared_text
+ expect(result).to eq 'Hey look at all the and'
+ end
+ end
+
describe 'to_iso_s' do
it 'detects english language for basic strings' do
strings = [