# frozen_string_literal: true
module Extractor
+ MAX_DOMAIN_LENGTH = 253
+
extend Twitter::TwitterText::Extractor
module_function
after = $'
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
+ _, domain = screen_name.split('@')
+
+ next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
+
start_position = match_data.char_begin(1) - 1
end_position = match_data.char_end(1)
class StatusLengthValidator < ActiveModel::Validator
MAX_CHARS = 500
URL_PLACEHOLDER_CHARS = 23
- URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}"
+ URL_PLACEHOLDER = 'x' * 23
def validate(status)
return unless status.local? && !status.reblog?
- @status = status
- status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?
+ status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status)
end
private
- def too_long?
- countable_length > MAX_CHARS
+ def too_long?(status)
+ countable_length(combined_text(status)) > MAX_CHARS
end
- def countable_length
- total_text.mb_chars.grapheme_length
+ def countable_length(str)
+ str.mb_chars.grapheme_length
end
- def total_text
- [@status.spoiler_text, countable_text].join
+ def combined_text(status)
+ [status.spoiler_text, countable_text(status.text)].join
end
- def countable_text
- return '' if @status.text.nil?
+ def countable_text(str)
+ return '' if str.blank?
- @status.text.dup.tap do |new_text|
- new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER)
- new_text.gsub!(Account::MENTION_RE, '@\2')
+ # To ensure that we only give length concessions to entities that
+ # will be correctly parsed during formatting, we go through full
+ # entity extraction
+
+ entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str))
+
+ rewrite_entities(str, entities) do |entity|
+ if entity[:url]
+ URL_PLACEHOLDER
+ elsif entity[:screen_name]
+ "@#{entity[:screen_name].split('@').first}"
+ end
end
end
+
+ def rewrite_entities(str, entities)
+ entities.sort_by! { |entity| entity[:indices].first }
+ result = ''.dup
+
+ last_index = entities.reduce(0) do |index, entity|
+ result << str[index...entity[:indices].first]
+ result << yield(entity)
+ entity[:indices].last
+ end
+
+ result << str[last_index..-1]
+ result
+ end
end
expect(status.errors).to have_received(:add)
end
+ it 'does not count overly long URLs as 23 characters flat' do
+ text = "http://example.com/valid?#{'#foo?' * 1000}"
+ status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
+ subject.validate(status)
+ expect(status.errors).to have_received(:add)
+ end
+
it 'counts only the front part of remote usernames' do
text = ('a' * 475) + " @alice@#{'b' * 30}.com"
status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
subject.validate(status)
expect(status.errors).to_not have_received(:add)
end
+
+ it 'does count both parts of remote usernames for overly long domains' do
+ text = "@alice@#{'b' * 500}.com"
+ status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
+
+ subject.validate(status)
+ expect(status.errors).to have_received(:add)
+ end
end
end