]> cat aescling's git repositories - mastodon.git/commitdiff
Add support for linking XMPP URIs in toots (#12709)
authorThibG <thib@sitedethib.com>
Sat, 11 Jan 2020 01:15:25 +0000 (02:15 +0100)
committerEugen Rochko <eugen@zeonfederated.com>
Sat, 11 Jan 2020 01:15:25 +0000 (02:15 +0100)
* Fix wrong grouping in Twitter valid_url regex

* Add support for xmpp URIs

Fixes #9776

The difficult part is autolinking, because Twitter-text's extractor does
some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP
URIs do not really match the assumptions of that lib, so it doesn't sound
wise to try to shoehorn it into the existing regex.

This is why I used a specific regex (very close, although slightly more
permissive than the RFC), and a specific scan function (a simplified version
of the generalized one from Twitter).

* Remove leading “xmpp:” from auto-linked text

app/lib/formatter.rb
app/lib/sanitize_config.rb
config/initializers/twitter_regex.rb
spec/lib/formatter_spec.rb

index 6ba3276141a54bd9d4505bb2aa3d8571e7087f70..c771dcaaa0d4c178b075b0a708fae84a527ad501 100644 (file)
@@ -245,8 +245,9 @@ class Formatter
     end
 
     standard = Extractor.extract_entities_with_indices(text, options)
+    xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
 
-    Extractor.remove_overlapping_entities(special + standard)
+    Extractor.remove_overlapping_entities(special + standard + xmpp)
   end
 
   def link_to_url(entity, options = {})
@@ -284,7 +285,7 @@ class Formatter
 
   def link_html(url)
     url    = Addressable::URI.parse(url).to_s
-    prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
+    prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
     text   = url[prefix.length, 30]
     suffix = url[prefix.length + 30..-1]
     cutoff = url[prefix.length..-1].length > 30
index 77045155e03149a256f1521584533894b57901de..e2480376e4ac50127dfccf4db14378e0f7c65771 100644 (file)
@@ -2,7 +2,7 @@
 
 class Sanitize
   module Config
-    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze
+    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
 
     CLASS_WHITELIST_TRANSFORMER = lambda do |env|
       node = env[:node]
index 0ddbbee9828bfdcfefb1e28964d27df3ab28bef9..87815d45801c6a7bb8127c2020b4257830511484 100644 (file)
@@ -29,7 +29,7 @@ module Twitter
       (                                                                                     #   $1 total match
         (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceding character
         (                                                                                   #   $3 URL
-          ((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)?                                    #   $4 Protocol (optional)
+          ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)?                                  #   $4 Protocol (optional)
           (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s)
           (?::(#{REGEXEN[:valid_port_number]}))?                                            #   $6 Port number (optional)
           (/#{REGEXEN[:valid_url_path]}*)?                                                  #   $7 URL Path and anchor
@@ -37,5 +37,54 @@ module Twitter
         )
       )
     }iox
+    REGEXEN[:validate_nodeid] = /(?:
+      #{REGEXEN[:validate_url_unreserved]}|
+      #{REGEXEN[:validate_url_pct_encoded]}|
+      [!$()*+,;=]
+    )/iox
+    REGEXEN[:validate_resid] = /(?:
+      #{REGEXEN[:validate_url_unreserved]}|
+      #{REGEXEN[:validate_url_pct_encoded]}|
+      #{REGEXEN[:validate_url_sub_delims]}
+    )/iox
+    REGEXEN[:valid_xmpp_uri] = %r{
+      (                                                                                     #   $1 total match
+        (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceding character
+        (                                                                                   #   $3 URL
+          ((?:xmpp):)                                                                       #   $4 Protocol
+          (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)?                     #   $5 Authority (optional)
+          (#{REGEXEN[:validate_nodeid]}+@)?                                                 #   $6 Username in path (optional)
+          (#{REGEXEN[:valid_domain]})                                                       #   $7 Domain in path
+          (/#{REGEXEN[:validate_resid]}+)?                                                  #   $8 Resource in path (optional)
+          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $9 Query String
+        )
+      )
+    }iox
+  end
+
+  module Extractor
+    # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
+    # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
+    # XMPP URIs an empty array will be returned.
+    #
+    # If a block is given then it will be called for each XMPP URI.
+    def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
+      return [] unless text && text.index(":")
+      urls = []
+
+      text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
+        valid_uri_match_data = $~
+
+        start_position = valid_uri_match_data.char_begin(3)
+        end_position = valid_uri_match_data.char_end(3)
+
+        urls << {
+          :url => valid_uri_match_data[3],
+          :indices => [start_position, end_position]
+        }
+      end
+      urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
+      urls
+    end
   end
 end
index b8108a247716ced195b017e3aac3ffb2403ee37c..83be0a5883b904cfddefe3d4092a5ace14e7d98d 100644 (file)
@@ -242,6 +242,22 @@ RSpec.describe Formatter do
         is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
       end
     end
+
+    context 'given a stand-alone xmpp: URI' do
+      let(:text) { 'xmpp:user@instance.com' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="xmpp:user@instance.com"'
+      end
+    end
+
+    context 'given a an xmpp: URI with a query-string' do
+      let(:text) { 'please join xmpp:muc@instance.com?join right now' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="xmpp:muc@instance.com?join"'
+      end
+    end
   end
 
   describe '#format_spoiler' do