end
end
+ attr_reader :content_type
+
# @param [String] text
# @param [Hash] options
# @option options [Boolean] :multiline
# @option options [Array<Account>] :preloaded_accounts
# @option options [String] :content_type
def initialize(text, options = {})
- content_type = options.delete(:content_type)
+ @content_type = options.delete(:content_type)
super(text, options)
@text = format_markdown(text) if content_type == 'text/markdown'
end
- # Differs from TextFormatter by not messing with newline after parsing
+ # Differs from TextFormatter by operating on the parsed HTML tree ;)
+ #
+ # See +#tree+
def to_s
return ''.html_safe if text.blank?
- html = rewrite do |entity|
- if entity[:url]
- link_to_url(entity)
- elsif entity[:hashtag]
- link_to_hashtag(entity)
- elsif entity[:screen_name]
- link_to_mention(entity)
+ result = tree.dup
+ result.css('mastodon-entity').each do |entity|
+ case entity['kind']
+ when 'hashtag'
+ entity.replace(link_to_hashtag({ hashtag: entity['value'] }))
+ when 'link'
+ entity.replace(link_to_url({ url: entity['value'] }))
+ when 'mention'
+ entity.replace(link_to_mention({ screen_name: entity['value'] }))
end
end
-
- html.html_safe # rubocop:disable Rails/OutputSafety
+ result.to_html.html_safe # rubocop:disable Rails/OutputSafety
end
- # Differs from `TextFormatter` by skipping HTML tags and entities
- def entities
- @entities ||= begin
- gaps = []
- total_offset = 0
-
- escaped = text.gsub(/<[^>]*>|&#[0-9]+;/) do |match|
- total_offset += match.length - 1
- end_offset = Regexp.last_match.end(0)
- gaps << [end_offset - total_offset, total_offset]
- ' '
- end
-
- Extractor.extract_entities_with_indices(escaped, extract_url_without_protocol: false).map do |entity|
- start_pos, end_pos = entity[:indices]
- offset_idx = gaps.rindex { |gap| gap.first <= start_pos }
- offset = offset_idx.nil? ? 0 : gaps[offset_idx].last
- entity.merge(indices: [start_pos + offset, end_pos + offset])
+ ##
+ # Process the status into a Nokogiri document fragment, with entities
+ # replaced with +<mastodon-entity>+s.
+ #
+ # Since +<mastodon-entity>+ is not allowed by the sanitizer, any such
+ # elements in the output *must* have been produced by this algorithm.
+ #
+ # These elements will need to be replaced prior to serialization (see
+ # +#to_s+).
+ def tree
+ if @tree.nil?
+ src = text.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
+ @tree = Nokogiri::HTML5.fragment(src)
+ Sanitize.node!(@tree, Sanitize::Config::MASTODON_OUTGOING)
+ document = @tree.document
+
+ @tree.xpath('.//text()[not(ancestor::a)]').each do |text_node|
+ # Iterate over text elements and build up their replacements.
+ content = text_node.content
+ replacement = Nokogiri::XML::NodeSet.new(document)
+ processed_index = 0
+ Extractor.extract_entities_with_indices(
+ content,
+ extract_url_without_protocol: false
+ ) do |entity|
+ # Iterate over entities in this text node.
+ advance = entity[:indices].first - processed_index
+ if advance.positive?
+ # Text node for content which precedes entity.
+ replacement << Nokogiri::XML::Text.new(
+ content[processed_index, advance],
+ document
+ )
+ end
+ elt = Nokogiri::XML::Element.new('mastodon-entity', document)
+ if entity[:url]
+ elt['kind'] = 'link'
+ elt['value'] = entity[:url]
+ elsif entity[:hashtag]
+ elt['kind'] = 'hashtag'
+ elt['value'] = entity[:hashtag]
+ elsif entity[:screen_name]
+ elt['kind'] = 'mention'
+ elt['value'] = entity[:screen_name]
+ end
+ replacement << elt
+ processed_index = entity[:indices].last
+ end
+ if processed_index < content.size
+ # Text node for remaining content.
+ replacement << Nokogiri::XML::Text.new(
+ content[processed_index, content.size - processed_index],
+ document
+ )
+ end
+ text_node.replace(replacement)
end
end
+ @tree
end
private
- # Differs from `TextFormatter` in that it keeps HTML; but it sanitizes at the end to remain safe
- def rewrite
- entities.sort_by! do |entity|
- entity[:indices].first
- end
-
- result = ''.dup
-
- last_index = entities.reduce(0) do |index, entity|
- indices = entity[:indices]
- result << text[index...indices.first]
- result << yield(entity)
- indices.last
- end
-
- result << text[last_index..-1]
-
- Sanitize.fragment(result, Sanitize::Config::MASTODON_OUTGOING)
- end
-
def format_markdown(html)
html = markdown_formatter.render(html)
html.delete("\r").delete("\n")