]> cat aescling's git repositories - mastodon.git/commitdiff
OEmbed support for PreviewCard (#2337)
authorEugen Rochko <eugen@zeonfederated.com>
Thu, 27 Apr 2017 12:42:22 +0000 (14:42 +0200)
committerGitHub <noreply@github.com>
Thu, 27 Apr 2017 12:42:22 +0000 (14:42 +0200)
* OEmbed support for PreviewCard

* Improve ProviderDiscovery code failure treatment

* Do not crawl links if there is a content warning, since those
don't display a link card anyway

* Reset db schema

* Fresh migrate

* Fix rubocop style issues
Fix #1681 - return existing access token when applicable instead of creating new

* Fix test

* Extract http client to helper

* Improve oembed controller

22 files changed:
Gemfile
Gemfile.lock
app/assets/javascripts/components/actions/cards.jsx
app/assets/javascripts/components/features/status/components/card.jsx
app/assets/stylesheets/components.scss
app/controllers/api/oembed_controller.rb
app/helpers/http_helper.rb [new file with mode: 0644]
app/lib/formatter.rb
app/lib/provider_discovery.rb [new file with mode: 0644]
app/lib/sanitize_config.rb [new file with mode: 0644]
app/models/preview_card.rb
app/services/fetch_atom_service.rb
app/services/fetch_link_card_service.rb
app/services/follow_remote_account_service.rb
app/services/post_status_service.rb
app/views/api/v1/statuses/card.rabl
config/initializers/doorkeeper.rb
config/initializers/kaminari_config.rb
config/initializers/oembed.rb [new file with mode: 0644]
db/migrate/20170425202925_add_oembed_to_preview_cards.rb [new file with mode: 0644]
db/schema.rb
spec/services/fetch_link_card_service_spec.rb

diff --git a/Gemfile b/Gemfile
index 701c724ee240fb678cf5fde9bdcb2af05671d3a1..2909d8e4544e882f8d13558aa851cf02efeaa63a 100644 (file)
--- a/Gemfile
+++ b/Gemfile
@@ -49,6 +49,7 @@ gem 'rails-settings-cached'
 gem 'redis', '~>3.2', require: ['redis', 'redis/connection/hiredis']
 gem 'rqrcode'
 gem 'ruby-oembed', require: 'oembed'
+gem 'sanitize'
 gem 'sidekiq'
 gem 'sidekiq-unique-jobs'
 gem 'simple-navigation'
index 14567dc5a477cba6ddddf43411b5fcceefe78998..fc8d28104f7e92e27d56395098798bbea0377acf 100644 (file)
@@ -123,6 +123,7 @@ GEM
     connection_pool (2.2.1)
     crack (0.4.3)
       safe_yaml (~> 1.0.0)
+    crass (1.0.2)
     debug_inspector (0.0.2)
     devise (4.2.1)
       bcrypt (~> 3.0)
@@ -258,6 +259,8 @@ GEM
     nio4r (2.0.0)
     nokogiri (1.7.1)
       mini_portile2 (~> 2.1.0)
+    nokogumbo (1.4.10)
+      nokogiri
     oj (2.18.5)
     openssl (2.0.3)
     orm_adapter (0.5.0)
@@ -398,6 +401,10 @@ GEM
     ruby-oembed (0.12.0)
     ruby-progressbar (1.8.1)
     safe_yaml (1.0.4)
+    sanitize (4.4.0)
+      crass (~> 1.0.2)
+      nokogiri (>= 1.4.4)
+      nokogumbo (~> 1.4.1)
     sass (3.4.23)
     sass-rails (5.0.6)
       railties (>= 4.0.0, < 6)
@@ -540,6 +547,7 @@ DEPENDENCIES
   rspec-sidekiq
   rubocop
   ruby-oembed
+  sanitize
   sass-rails (~> 5.0)
   sidekiq
   sidekiq-unique-jobs
index d4c1eda601bf34ffb1ba4d6804ef059c330d943c..805be9709ba62b0212da7a09cd25dd5e464f014c 100644 (file)
@@ -13,7 +13,7 @@ export function fetchStatusCard(id) {
     dispatch(fetchStatusCardRequest(id));
 
     api(getState).get(`/api/v1/statuses/${id}/card`).then(response => {
-      if (!response.data.url || !response.data.title || !response.data.description) {
+      if (!response.data.url) {
         return;
       }
 
index 1b5722b6ae704fa515a60dc1f943750d92e756ee..a5ce7f08af5e4805bd2f63d26e0459372f42b61a 100644 (file)
@@ -14,14 +14,11 @@ const getHostname = url => {
 
 class Card extends React.PureComponent {
 
-  render () {
+  renderLink () {
     const { card } = this.props;
 
-    if (card === null) {
-      return null;
-    }
-
-    let image = '';
+    let image    = '';
+    let provider = card.get('provider_name');
 
     if (card.get('image')) {
       image = (
@@ -31,18 +28,64 @@ class Card extends React.PureComponent {
       );
     }
 
+    if (provider.length < 1) {
+      provider = getHostname(card.get('url'))
+    }
+
     return (
       <a href={card.get('url')} className='status-card' target='_blank' rel='noopener'>
         {image}
 
         <div className='status-card__content'>
           <strong className='status-card__title' title={card.get('title')}>{card.get('title')}</strong>
-          <p className='status-card__description'>{card.get('description').substring(0, 50)}</p>
-          <span className='status-card__host' style={hostStyle}>{getHostname(card.get('url'))}</span>
+          <p className='status-card__description'>{(card.get('description') || '').substring(0, 50)}</p>
+          <span className='status-card__host' style={hostStyle}>{provider}</span>
         </div>
       </a>
     );
   }
+
+  renderPhoto () {
+    const { card } = this.props;
+
+    return (
+      <a href={card.get('url')} className='status-card-photo' target='_blank' rel='noopener'>
+        <img src={card.get('url')} alt={card.get('title')} width={card.get('width')} height={card.get('height')} />
+      </a>
+    );
+  }
+
+  renderVideo () {
+    const { card } = this.props;
+    const content  = { __html: card.get('html') };
+
+    return (
+      <div
+        className='status-card-video'
+        dangerouslySetInnerHTML={content}
+      />
+    );
+  }
+
+  render () {
+    const { card } = this.props;
+
+    if (card === null) {
+      return null;
+    }
+
+    switch(card.get('type')) {
+    case 'link':
+      return this.renderLink();
+    case 'photo':
+      return this.renderPhoto();
+    case 'video':
+      return this.renderVideo();
+    case 'rich':
+    default:
+      return null;
+    }
+  }
 }
 
 Card.propTypes = {
index cbbe746c1b22cc41d0a48e60f5f74bdf7473bbfa..42257563976b86604be71b677b724947dc155741 100644 (file)
@@ -1734,6 +1734,28 @@ button.icon-button.active i.fa-retweet {
   }
 }
 
+.status-card-video, .status-card-rich, .status-card-photo {
+  margin-top: 14px;
+  overflow: hidden;
+
+  iframe {
+    width: 100%;
+    height: auto;
+  }
+}
+
+.status-card-photo {
+  display: block;
+  text-decoration: none;
+
+  img {
+    display: block;
+    width: 100%;
+    height: auto;
+    margin: 0;
+  }
+}
+
 .status-card__title {
   display: block;
   font-weight: 500;
index 2ea48229669418b310595dc4a3a244e117cfea3b..58d8207f60833f5cebc8f4df659fc8943738871c 100644 (file)
@@ -14,8 +14,20 @@ class Api::OEmbedController < ApiController
   def stream_entry_from_url(url)
     params = Rails.application.routes.recognize_path(url)
 
-    raise ActiveRecord::RecordNotFound unless params[:controller] == 'stream_entries' && params[:action] == 'show'
+    raise ActiveRecord::RecordNotFound unless recognized_stream_entry_url?(params)
 
-    StreamEntry.find(params[:id])
+    stream_entry(params)
+  end
+
+  def recognized_stream_entry_url?(params)
+    %w(stream_entries statuses).include?(params[:controller]) && params[:action] == 'show'
+  end
+
+  def stream_entry(params)
+    if params[:controller] == 'stream_entries'
+      StreamEntry.find(params[:id])
+    else
+      Status.find(params[:id]).stream_entry
+    end
   end
 end
diff --git a/app/helpers/http_helper.rb b/app/helpers/http_helper.rb
new file mode 100644 (file)
index 0000000..1e1ac82
--- /dev/null
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module HttpHelper
+  USER_AGENT = "#{HTTP::Request::USER_AGENT} (Mastodon/#{Mastodon::VERSION}; +http://#{Rails.configuration.x.local_domain}/)"
+
+  def http_client(options = {})
+    timeout = { write: 10, connect: 10, read: 10 }.merge(options)
+
+    HTTP.headers(user_agent: USER_AGENT)
+        .timeout(:per_operation, timeout)
+        .follow
+  end
+end
index 1d8e90d1ffa82b87f5df34ee2991b6aa7923090c..5ae6238d9db016c3bc97e961484e4b64ed4634f4 100644 (file)
@@ -1,13 +1,13 @@
 # frozen_string_literal: true
 
 require 'singleton'
+require_relative './sanitize_config'
 
 class Formatter
   include Singleton
   include RoutingHelper
 
   include ActionView::Helpers::TextHelper
-  include ActionView::Helpers::SanitizeHelper
 
   def format(status)
     return reformat(status.content) unless status.local?
@@ -23,7 +23,7 @@ class Formatter
   end
 
   def reformat(html)
-    sanitize(html, tags: %w(a br p span), attributes: %w(href rel class))
+    sanitize(html, Sanitize::Config::MASTODON_STRICT)
   end
 
   def plaintext(status)
@@ -43,6 +43,10 @@ class Formatter
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
 
+  def sanitize(html, config)
+    Sanitize.fragment(html, config)
+  end
+
   private
 
   def encode(html)
diff --git a/app/lib/provider_discovery.rb b/app/lib/provider_discovery.rb
new file mode 100644 (file)
index 0000000..761ddae
--- /dev/null
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+class ProviderDiscovery < OEmbed::ProviderDiscovery
+  include HttpHelper
+
+  class << self
+    def discover_provider(url, options = {})
+      res    = http_client.get(url)
+      format = options[:format]
+
+      raise OEmbed::NotFound, url if res.code != 200 || res.mime_type != 'text/html'
+
+      html = Nokogiri::HTML(res.to_s)
+
+      if format.nil? || format == :json
+        provider_endpoint ||= html.at_xpath('//link[@type="application/json+oembed"]')&.attribute('href')&.value
+        format ||= :json if provider_endpoint
+      end
+
+      if format.nil? || format == :xml
+        provider_endpoint ||= html.at_xpath('//link[@type="application/xml+oembed"]')&.attribute('href')&.value
+        format ||= :xml if provider_endpoint
+      end
+
+      begin
+        provider_endpoint = Addressable::URI.parse(provider_endpoint)
+        provider_endpoint.query = nil
+        provider_endpoint = provider_endpoint.to_s
+      rescue Addressable::URI::InvalidURIError
+        raise OEmbed::NotFound, url
+      end
+
+      OEmbed::Provider.new(provider_endpoint, format || OEmbed::Formatter.default)
+    end
+  end
+end
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
new file mode 100644 (file)
index 0000000..7cf1c30
--- /dev/null
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+class Sanitize
+  module Config
+    HTTP_PROTOCOLS ||= ['http', 'https', :relative].freeze
+
+    MASTODON_STRICT ||= freeze_config(
+      elements: %w(p br span a),
+
+      attributes: {
+        'a'    => %w(href),
+        'span' => %w(class),
+      },
+
+      protocols: {
+        'a' => { 'href' => HTTP_PROTOCOLS },
+      }
+    )
+
+    MASTODON_OEMBED ||= freeze_config merge(
+      RELAXED,
+      elements: RELAXED[:elements] + %w(audio embed iframe source video),
+
+      attributes: merge(
+        RELAXED[:attributes],
+        'audio'  => %w(controls),
+        'embed'  => %w(height src type width),
+        'iframe' => %w(allowfullscreen frameborder height scrolling src width),
+        'source' => %w(src type),
+        'video'  => %w(controls height loop width),
+        'div'    => [:data]
+      ),
+
+      protocols: merge(
+        RELAXED[:protocols],
+        'embed'  => { 'src' => HTTP_PROTOCOLS },
+        'iframe' => { 'src' => HTTP_PROTOCOLS },
+        'source' => { 'src' => HTTP_PROTOCOLS }
+      )
+    )
+  end
+end
index e59b05eb877e1b17dca712f2436554e265ff84b3..0aa771101227a8b3d8ac256b56133739d66c1dfa 100644 (file)
@@ -3,6 +3,10 @@
 class PreviewCard < ApplicationRecord
   IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif'].freeze
 
+  self.inheritance_column = false
+
+  enum type: [:link, :photo, :video, :rich]
+
   belongs_to :status
 
   has_attached_file :image, styles: { original: '120x120#' }, convert_options: { all: '-quality 80 -strip' }
index c3dad1eb9315ceab17f7797615541dbfad52a213..9c514aa9f9767682353a722ffd3c56e02277660b 100644 (file)
@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 
 class FetchAtomService < BaseService
+  include HttpHelper
+
   def call(url)
     return if url.blank?
 
@@ -45,8 +47,4 @@ class FetchAtomService < BaseService
   def fetch(url)
     http_client.get(url).to_s
   end
-
-  def http_client
-    HTTP.timeout(:per_operation, write: 10, connect: 10, read: 10).follow
-  end
 end
index f74a0d34dc2ccc65fc93cfe727fc5035e392a0d7..416c5fdadf53ece8d30baab6691e5f1364801f4b 100644 (file)
@@ -1,8 +1,9 @@
 # frozen_string_literal: true
 
 class FetchLinkCardService < BaseService
+  include HttpHelper
+
   URL_PATTERN = %r{https?://\S+}
-  USER_AGENT = "#{HTTP::Request::USER_AGENT} (Mastodon/#{Mastodon::VERSION}; +http://#{Rails.configuration.x.local_domain}/)"
 
   def call(status)
     # Get first http/https URL that isn't local
@@ -10,13 +11,53 @@ class FetchLinkCardService < BaseService
 
     return if url.nil?
 
+    card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url)
+    attempt_opengraph(card, url) unless attempt_oembed(card, url)
+  end
+
+  private
+
+  def attempt_oembed(card, url)
+    response = OEmbed::Providers.get(url)
+
+    card.type          = response.type
+    card.title         = response.respond_to?(:title)         ? response.title         : ''
+    card.author_name   = response.respond_to?(:author_name)   ? response.author_name   : ''
+    card.author_url    = response.respond_to?(:author_url)    ? response.author_url    : ''
+    card.provider_name = response.respond_to?(:provider_name) ? response.provider_name : ''
+    card.provider_url  = response.respond_to?(:provider_url)  ? response.provider_url  : ''
+    card.width         = 0
+    card.height        = 0
+
+    case card.type
+    when 'link'
+      card.image = URI.parse(response.thumbnail_url) if response.respond_to?(:thumbnail_url)
+    when 'photo'
+      card.url    = response.url
+      card.width  = response.width.presence  || 0
+      card.height = response.height.presence || 0
+    when 'video'
+      card.width  = response.width.presence  || 0
+      card.height = response.height.presence || 0
+      card.html   = Formatter.instance.sanitize(response.html, Sanitize::Config::MASTODON_OEMBED)
+    when 'rich'
+      # Most providers rely on <script> tags, which is a no-no
+      return false
+    end
+
+    card.save_with_optional_image!
+  rescue OEmbed::NotFound
+    false
+  end
+
+  def attempt_opengraph(card, url)
     response = http_client.get(url)
 
     return if response.code != 200 || response.mime_type != 'text/html'
 
     page = Nokogiri::HTML(response.to_s)
-    card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url)
 
+    card.type        = :link
     card.title       = meta_property(page, 'og:title') || page.at_xpath('//title')&.content
     card.description = meta_property(page, 'og:description') || meta_property(page, 'description')
     card.image       = URI.parse(Addressable::URI.parse(meta_property(page, 'og:image')).normalize.to_s) if meta_property(page, 'og:image')
@@ -26,12 +67,6 @@ class FetchLinkCardService < BaseService
     card.save_with_optional_image!
   end
 
-  private
-
-  def http_client
-    HTTP.headers(user_agent: USER_AGENT).timeout(:per_operation, write: 10, connect: 10, read: 10).follow
-  end
-
   def meta_property(html, property)
     html.at_xpath("//meta[@property=\"#{property}\"]")&.attribute('content')&.value || html.at_xpath("//meta[@name=\"#{property}\"]")&.attribute('content')&.value
   end
index fbf983093ab0d916c81d30d957fe2df9b4e2baab..0b5abf58a20996ad4628e72b0b660390401b560c 100644 (file)
@@ -2,6 +2,7 @@
 
 class FollowRemoteAccountService < BaseService
   include OStatus2::MagicKey
+  include HttpHelper
 
   DFRN_NS = 'http://purl.org/macgirvin/dfrn/1.0'
 
@@ -73,7 +74,7 @@ class FollowRemoteAccountService < BaseService
   end
 
   def get_feed(url)
-    response = http_client.get(Addressable::URI.parse(url).normalize)
+    response = http_client(write: 20, connect: 20, read: 50).get(Addressable::URI.parse(url).normalize)
     [response.to_s, Nokogiri::XML(response)]
   end
 
@@ -98,8 +99,4 @@ class FollowRemoteAccountService < BaseService
   def get_profile(body, account)
     RemoteProfileUpdateWorker.perform_async(account.id, body.force_encoding('UTF-8'), false)
   end
-
-  def http_client
-    HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50)
-  end
 end
index 30b8032ed9356895f26788eeb0c3f3a70dada04a..b665391e3f1bf41668b6045d25aa04865c1f55cc 100644 (file)
@@ -34,7 +34,7 @@ class PostStatusService < BaseService
     process_mentions_service.call(status)
     process_hashtags_service.call(status)
 
-    LinkCrawlWorker.perform_async(status.id)
+    LinkCrawlWorker.perform_async(status.id) unless status.spoiler_text.present?
     DistributionWorker.perform_async(status.id)
     Pubsubhubbub::DistributionWorker.perform_async(status.stream_entry.id)
 
index 8ba8dcbb1e6c8bbe6571cbdbdbe2bff3a832ace2..5d8d7af3ba5dd24158a6e2808d7369ce084eb070 100644 (file)
@@ -1,5 +1,7 @@
 object @card
 
-attributes :url, :title, :description
+attributes :url, :title, :description, :type,
+           :author_name, :author_url, :provider_name,
+           :provider_url, :html, :width, :height
 
 node(:image) { |card| card.image? ? full_asset_url(card.image.url(:original)) : nil }
index 2317733eb93abd1d23d67d9a2d096f3890ae2149..b618bf344d3accca32ed34b1a07276cf2a277bcc 100644 (file)
@@ -36,7 +36,7 @@ Doorkeeper.configure do
 
   # Reuse access token for the same resource owner within an application (disabled by default)
   # Rationale: https://github.com/doorkeeper-gem/doorkeeper/issues/383
-  reuse_access_token
+  reuse_access_token
 
   # Issue access tokens with refresh token (disabled by default)
   # use_refresh_token
index bd455f3827a065918116991a3eaf52b2048962bc..27b183eeb3c3d9415f93bd399163d31e7d3fcc54 100644 (file)
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+
 Kaminari.configure do |config|
   config.default_per_page = 40
   config.window = 1
diff --git a/config/initializers/oembed.rb b/config/initializers/oembed.rb
new file mode 100644 (file)
index 0000000..208e586
--- /dev/null
@@ -0,0 +1,4 @@
+# frozen_string_literal: true
+
+require_relative '../../app/lib/provider_discovery'
+OEmbed::Providers.register_fallback(ProviderDiscovery)
diff --git a/db/migrate/20170425202925_add_oembed_to_preview_cards.rb b/db/migrate/20170425202925_add_oembed_to_preview_cards.rb
new file mode 100644 (file)
index 0000000..6a932bb
--- /dev/null
@@ -0,0 +1,12 @@
+class AddOEmbedToPreviewCards < ActiveRecord::Migration[5.0]
+  def change
+    add_column :preview_cards, :type, :integer, default: 0, null: false
+    add_column :preview_cards, :html, :text, null: false, default: ''
+    add_column :preview_cards, :author_name, :string, null: false, default: ''
+    add_column :preview_cards, :author_url, :string, null: false, default: ''
+    add_column :preview_cards, :provider_name, :string, null: false, default: ''
+    add_column :preview_cards, :provider_url, :string, null: false, default: ''
+    add_column :preview_cards, :width, :integer, default: 0, null: false
+    add_column :preview_cards, :height, :integer, default: 0, null: false
+  end
+end
index cd31eb528bec50704283bd5011bb3faa7707f62d..66326f2e2089381e99a4116f99e0655dd0998679 100644 (file)
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 20170425131920) do
+ActiveRecord::Schema.define(version: 20170425202925) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
@@ -203,6 +203,14 @@ ActiveRecord::Schema.define(version: 20170425131920) do
     t.datetime "image_updated_at"
     t.datetime "created_at",                      null: false
     t.datetime "updated_at",                      null: false
+    t.integer  "type",               default: 0,  null: false
+    t.text     "html",               default: "", null: false
+    t.string   "author_name",        default: "", null: false
+    t.string   "author_url",         default: "", null: false
+    t.string   "provider_name",      default: "", null: false
+    t.string   "provider_url",       default: "", null: false
+    t.integer  "width",              default: 0,  null: false
+    t.integer  "height",             default: 0,  null: false
     t.index ["status_id"], name: "index_preview_cards_on_status_id", unique: true, using: :btree
   end
 
@@ -333,4 +341,4 @@ ActiveRecord::Schema.define(version: 20170425131920) do
   end
 
   add_foreign_key "statuses", "statuses", column: "reblog_of_id", on_delete: :cascade
-end
\ No newline at end of file
+end
index 5d72d40b6204e267e3f0a3e3627329760106d92d..46fec295d5d3a3718e6990c92d23704359d55b5f 100644 (file)
@@ -9,6 +9,6 @@ RSpec.describe FetchLinkCardService do
     status = Fabricate(:status, text: 'Check out http://example.中国')
 
     FetchLinkCardService.new.call(status)
-    expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made
+    expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once
   end
 end