]> cat aescling's git repositories - mastodon.git/commitdiff
Fix #555 - Use a better URL parser
authorEugen Rochko <eugen@zeonfederated.com>
Wed, 22 Feb 2017 18:35:11 +0000 (19:35 +0100)
committerEugen Rochko <eugen@zeonfederated.com>
Wed, 22 Feb 2017 18:35:11 +0000 (19:35 +0100)
Gemfile
Gemfile.lock
app/lib/formatter.rb
spec/lib/formatter_spec.rb

diff --git a/Gemfile b/Gemfile
index 55c1de69309b15c628164ac5fba497cc8ab7adef..c97f80bdeeb35b4b245e23035d121a97143fe99e 100644 (file)
--- a/Gemfile
+++ b/Gemfile
@@ -35,6 +35,7 @@ gem 'devise-two-factor'
 gem 'doorkeeper'
 gem 'rabl'
 gem 'rqrcode'
+gem 'twitter-text'
 gem 'oj'
 gem 'hiredis'
 gem 'redis', '~>3.2'
index f50edaf95684170b77e5d5bc95fe8594ddd2cfad..19d61103de8123205ac8fd24b853b6d9fa0e85f5 100644 (file)
@@ -422,6 +422,8 @@ GEM
     thread_safe (0.3.5)
     tilt (2.0.5)
     tins (1.12.0)
+    twitter-text (1.14.5)
+      unf (~> 0.1.0)
     tzinfo (1.2.2)
       thread_safe (~> 0.1)
     uglifier (3.0.1)
@@ -514,6 +516,7 @@ DEPENDENCIES
   simple_form
   simplecov
   statsd-instrument
+  twitter-text
   uglifier (>= 1.3.0)
   webmock
   will_paginate
index 044407a6c152a695e10ad636599c6db7c75cee36..073ab0784e944bc2b389e91e27c1e8b996dde539 100644 (file)
@@ -9,6 +9,8 @@ class Formatter
   include ActionView::Helpers::TextHelper
   include ActionView::Helpers::SanitizeHelper
 
+  AUTOLINK_RE = /https?:\/\/([\S]+\.[!#$&-;=?-[\]_a-z~]|%[\w\d]{2}]+[\w])/i
+
   def format(status)
     return reformat(status.content) unless status.local?
 
@@ -44,9 +46,9 @@ class Formatter
   end
 
   def link_urls(html)
-    html.gsub(URI.regexp(%w(http https))) do |match|
-      link_html(match)
-    end
+    Twitter::Autolink.auto_link_urls(html, url_target: '_blank',
+                                           link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' },
+                                           link_text_block: lambda { |_, text| link_html(text) })
   end
 
   def link_mentions(html, mentions)
@@ -70,7 +72,7 @@ class Formatter
     suffix = url[prefix.length + 30..-1]
     cutoff = url[prefix.length..-1].length > 30
 
-    "<a rel=\"nofollow noopener\" target=\"_blank\" href=\"#{url}\"><span class=\"invisible\">#{prefix}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{text}</span><span class=\"invisible\">#{suffix}</span></a>"
+    "<span class=\"invisible\">#{prefix}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{text}</span><span class=\"invisible\">#{suffix}</span>"
   end
 
   def hashtag_html(match)
index 0db1634e941e2a957bd778be2b4a5973643fb08d..4b003b8e5092b20d90ac6cff96f152e83f028584 100644 (file)
@@ -17,8 +17,38 @@ RSpec.describe Formatter do
     end
 
     it 'contains a link' do
-      expect(subject).to match('<a rel="nofollow noopener" target="_blank" href="http://google.com"><span class="invisible">http://</span><span class="">google.com</span><span class="invisible"></span></a>')
+      expect(subject).to match('<a href="http://google.com" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com</span><span class="invisible"></span></a>')
     end
+
+=begin
+    it 'matches a stand-alone medium URL' do
+      expect(subject.match('https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4')[0]).to eq 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4'
+    end
+
+    it 'matches a stand-alone google URL' do
+      expect(subject.match('http://google.com')[0]).to eq 'http://google.com'
+    end
+
+    it 'matches a URL without trailing period' do
+      expect(subject.match('http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ')[0]).to eq 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona'
+    end
+
+    it 'matches a URL without closing paranthesis' do
+      expect(subject.match('(http://google.com/)')[0]).to eq 'http://google.com'
+    end
+
+    it 'matches a URL without exclamation point' do
+      expect(subject.match('http://www.google.com! ')[0]).to eq 'http://www.google.com'
+    end
+
+    it 'matches a URL with a query string' do
+      expect(subject.match('https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink')[0]).to eq 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink'
+    end
+
+    it 'matches a URL with parenthesis in it' do
+      expect(subject.match('https://en.wikipedia.org/wiki/Diaspora_(software)')[0]).to eq 'https://en.wikipedia.org/wiki/Diaspora_(software)'
+    end
+=end
   end
 
   describe '#reformat' do