]> cat aescling's git repositories - mastodon.git/commitdiff
Fix some link previews being incorrectly generated from other prior links (#16885)
authorClaire <claire.github-309c@sitedethib.com>
Thu, 21 Oct 2021 18:39:35 +0000 (20:39 +0200)
committerGitHub <noreply@github.com>
Thu, 21 Oct 2021 18:39:35 +0000 (20:39 +0200)
* Add tests

* Fix some link previews being incorrectly generated from different prior links

PR #12403 added a cache to avoid redundant queries when the OEmbed endpoint can
be guessed from the URL. This caching mechanism is not perfectly correct as
there is no guarantee that all pages from a given domain share the same
OEmbed provider endpoint.

This PR prevents the FetchOEmbedService from caching OEmbed endpoint that
cannot be generalized by replacing a fully-qualified URL from the endpoint's
parameters, greatly reducing the number of incorrect cached generalizations.

app/services/fetch_oembed_service.rb
spec/fixtures/requests/oembed_youtube.html [new file with mode: 0644]
spec/services/fetch_oembed_service_spec.rb

index 60be9b9dc09e584c0da24b60ce1922901e1d4c08..4cbaa04c623ab263bcdbd4390464a0a3be607877 100644 (file)
@@ -2,6 +2,7 @@
 
 class FetchOEmbedService
   ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
+  URL_REGEX                 = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze
 
   attr_reader :url, :options, :format, :endpoint_url
 
@@ -65,10 +66,12 @@ class FetchOEmbedService
   end
 
   def cache_endpoint!
+    return unless URL_REGEX.match?(@endpoint_url)
+
     url_domain = Addressable::URI.parse(@url).normalized_host
 
     endpoint_hash = {
-      endpoint: @endpoint_url.gsub(/(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i, '={url}'),
+      endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
       format: @format,
     }
 
diff --git a/spec/fixtures/requests/oembed_youtube.html b/spec/fixtures/requests/oembed_youtube.html
new file mode 100644 (file)
index 0000000..1508e4d
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <link rel="alternate" type="application/json+oembed" href="https://www.youtube.com/oembed?format=json&amp;url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE" title="What is Mastodon?">
+  </head>
+  <body></body>
+</html>
index a4262b040853f1cbe0f992c29f770d6a826eb75a..88f0113eddfd7f1d9244671e283e1600fb9ab7f4 100644 (file)
@@ -13,6 +13,32 @@ describe FetchOEmbedService, type: :service do
 
   describe 'discover_provider' do
     context 'when status code is 200 and MIME type is text/html' do
+      context 'when OEmbed endpoint contains URL as parameter' do
+        before do
+          stub_request(:get, 'https://www.youtube.com/watch?v=IPSbNdBmWKE').to_return(
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+            body: request_fixture('oembed_youtube.html'),
+          )
+          stub_request(:get, 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE').to_return(
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+            body: request_fixture('oembed_json_empty.html')
+          )
+        end
+
+        it 'returns new OEmbed::Provider for JSON provider' do
+          subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
+          expect(subject.endpoint_url).to eq 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE'
+          expect(subject.format).to eq :json
+        end
+
+        it 'stores URL template' do
+          subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
+          expect(Rails.cache.read('oembed_endpoint:www.youtube.com')[:endpoint]).to eq 'https://www.youtube.com/oembed?format=json&url={url}'
+        end
+      end
+
       context 'Both of JSON and XML provider are discoverable' do
         before do
           stub_request(:get, 'https://host.test/oembed.html').to_return(
@@ -33,6 +59,11 @@ describe FetchOEmbedService, type: :service do
           expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
           expect(subject.format).to eq :xml
         end
+
+        it 'does not cache OEmbed endpoint' do
+          subject.call('https://host.test/oembed.html', format: :xml)
+          expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
+        end
       end
 
       context 'JSON provider is discoverable while XML provider is not' do
@@ -49,6 +80,11 @@ describe FetchOEmbedService, type: :service do
           expect(subject.endpoint_url).to eq 'https://host.test/provider.json'
           expect(subject.format).to eq :json
         end
+
+        it 'does not cache OEmbed endpoint' do
+          subject.call('https://host.test/oembed.html')
+          expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
+        end
       end
 
       context 'XML provider is discoverable while JSON provider is not' do
@@ -65,6 +101,11 @@ describe FetchOEmbedService, type: :service do
           expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
           expect(subject.format).to eq :xml
         end
+
+        it 'does not cache OEmbed endpoint' do
+          subject.call('https://host.test/oembed.html')
+          expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
+        end
       end
 
       context 'Invalid XML provider is discoverable while JSON provider is not' do