end
standard = Extractor.extract_entities_with_indices(text, options)
+ xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
- Extractor.remove_overlapping_entities(special + standard)
+ Extractor.remove_overlapping_entities(special + standard + xmpp)
end
def link_to_url(entity, options = {})
def link_html(url)
url = Addressable::URI.parse(url).to_s
- prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
+ prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
text = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30
class Sanitize
module Config
- HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze
+ HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node]
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
- ((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
+ ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
)
)
}iox
+ REGEXEN[:validate_nodeid] = /(?:
+ #{REGEXEN[:validate_url_unreserved]}|
+ #{REGEXEN[:validate_url_pct_encoded]}|
+ [!$()*+,;=]
+ )/iox
+ REGEXEN[:validate_resid] = /(?:
+ #{REGEXEN[:validate_url_unreserved]}|
+ #{REGEXEN[:validate_url_pct_encoded]}|
+ #{REGEXEN[:validate_url_sub_delims]}
+ )/iox
+ REGEXEN[:valid_xmpp_uri] = %r{
+ ( # $1 total match
+ (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
+ ( # $3 URL
+ ((?:xmpp):) # $4 Protocol
+ (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
+ (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
+ (#{REGEXEN[:valid_domain]}) # $7 Domain in path
+ (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
+ (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
+ )
+ )
+ }iox
+ end
+
+ module Extractor
+ # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
+ # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
+ # XMPP URIs an empty array will be returned.
+ #
+ # If a block is given then it will be called for each XMPP URI.
+ def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
+ return [] unless text && text.index(":")
+ urls = []
+
+ text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
+ valid_uri_match_data = $~
+
+ start_position = valid_uri_match_data.char_begin(3)
+ end_position = valid_uri_match_data.char_end(3)
+
+ urls << {
+ :url => valid_uri_match_data[3],
+ :indices => [start_position, end_position]
+ }
+ end
+ urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
+ urls
+ end
end
end
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
end
end
+
+ context 'given a stand-alone xmpp: URI' do
+ let(:text) { 'xmpp:user@instance.com' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="xmpp:user@instance.com"'
+ end
+ end
+
+ context 'given a an xmpp: URI with a query-string' do
+ let(:text) { 'please join xmpp:muc@instance.com?join right now' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="xmpp:muc@instance.com?join"'
+ end
+ end
end
describe '#format_spoiler' do