- g++-6
- libprotobuf-dev
- protobuf-compiler
+ - libicu-dev
rvm:
- 2.3.4
ffmpeg
libxdamage1
libxfixes3
+libicu-dev
ffmpeg \
file \
git \
+ icu-dev \
imagemagick@edge \
libpq \
libxml2 \
gem 'addressable', '~> 2.5'
gem 'bootsnap'
gem 'browser'
+gem 'charlock_holmes', '~> 0.7.3'
gem 'cld3', '~> 3.1'
gem 'devise', '~> 4.2'
gem 'devise-two-factor', '~> 3.0'
rack (>= 1.0.0)
rack-test (>= 0.5.4)
xpath (~> 2.0)
+ charlock_holmes (0.7.3)
case_transform (0.2)
activesupport
chunky_png (1.3.8)
capistrano-rbenv (~> 2.1)
capistrano-yarn (~> 2.0)
capybara (~> 2.14)
+ charlock_holmes (~> 0.7.3)
cld3 (~> 3.1)
climate_control (~> 0.2)
devise (~> 4.2)
yarn \
libprotobuf-dev \
libreadline-dev \
+ libicu-dev \
-y
# Install rvm
# frozen_string_literal: true
-require 'nkf'
class FetchLinkCardService < BaseService
include HttpHelper
return if response.code != 200 || response.mime_type != 'text/html'
html = response.to_s
- page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s)
+
+ detector = CharlockHolmes::EncodingDetector.new
+ detector.strip_tags = true
+
+ guess = detector.detect(html, response.charset)
+ page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding))
card.type = :link
card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content
--- /dev/null
+HTTP/1.1 200 OK
+Server: nginx/1.11.10
+Date: Tue, 04 Jul 2017 16:43:39 GMT
+Content-Type: text/html
+Content-Length: 273
+Connection: keep-alive
+Last-Modified: Tue, 04 Jul 2017 16:41:34 GMT
+Accept-Ranges: bytes
+
+<HTML>
+<HEAD>
+ <META NAME="GENERATOR" CONTENT="Adobe PageMill 3.0J Mac">
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=koi8-r">
+ <TITLE>������� ��������� ������ �� XVI ��. ���������� ������� ������������.</TITLE>
+</HEAD>
+<BODY>
+<P><CENTER><B><FONT SIZE="+2">������� ��������� ������ �� XVI ��. ���������� ������� ������������.</FONT></B><BR>
+<HR><BR>
+</BODY>
+</HTML>
<HEAD>
<META NAME="GENERATOR" CONTENT="Adobe PageMill 3.0J Mac">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
- <TITLE>JSIS�̃y�[�W</TITLE>
+ <TITLE>SJIS�̃y�[�W</TITLE>
</HEAD>
<BODY>
-<P><CENTER><B><FONT SIZE="+2">SJIS�̃y�[�W</FONT></B><BR>
+<P><CENTER><B><FONT SIZE="+2">�����N�܂��Ă�����L�O�l��Ă�̂̎��ł�����ł��B�����ԂɈӖ��҂͐������ǂ�Ȕ���܂����܂ł��\���グ����������邽�ɂ͎Q�l�A�邽��������A�����ɂ������܂��Ȃ��B���炢���Ȃ��̂͂ǂ���㌎��ł��邾�����������B������ĉ��c����ɔ��R�K�������ɉ]���ł����͂����������͂��Ȃ����w�}���Ƃ������o���Ȃ�����Ȃ���āA���͎̐̂�����͉A��{�炩��A�v������̂�̂���̂��\82�������ɂ���]�ƌ����man�ɂ������֎Q��悤�ɓ����ɂ��������łȂ�̂ŁA�������\���ɕς���Ă���ł����ōl�������B������Ⴆ���������Ƃǂ܂��̂��ۂނ�݂Ƃ���ł��āA���̎����ł͐\����ĂƂ��Đ��Ԃɕ�ׂ̂ɍs���Ȃ���ȁB</FONT></B><BR>
<HR><BR>
</BODY>
</HTML>
--- /dev/null
+HTTP/1.1 200 OK
+Server: nginx/1.11.10
+Date: Tue, 04 Jul 2017 16:43:39 GMT
+Content-Type: text/html; charset=utf-8
+Content-Length: 273
+Connection: keep-alive
+Last-Modified: Tue, 04 Jul 2017 16:41:34 GMT
+Accept-Ranges: bytes
+
+<HTML>
+<HEAD>
+ <META NAME="GENERATOR" CONTENT="Adobe PageMill 3.0J Mac">
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
+ <TITLE>SJIS�̃y�[�W</TITLE>
+</HEAD>
+<BODY>
+<P><CENTER><B><FONT SIZE="+2">�����N�܂��Ă�����L�O�l��Ă�̂̎��ł�����ł��B�����ԂɈӖ��҂͐������ǂ�Ȕ���܂����܂ł��\���グ����������邽�ɂ͎Q�l�A�邽��������A�����ɂ������܂��Ȃ��B���炢���Ȃ��̂͂ǂ���㌎��ł��邾�����������B������ĉ��c����ɔ��R�K�������ɉ]���ł����͂����������͂��Ȃ����w�}���Ƃ������o���Ȃ�����Ȃ���āA���͎̐̂�����͉A��{�炩��A�v������̂�̂���̂��\82�������ɂ���]�ƌ����man�ɂ������֎Q��悤�ɓ����ɂ��������łȂ�̂ŁA�������\���ɕς���Ă���ł����ōl�������B������Ⴆ���������Ƃǂ܂��̂��ۂނ�݂Ƃ���ł��āA���̎����ł͐\����ĂƂ��Đ��Ԃɕ�ׂ̂ɍs���Ȃ���ȁB</FONT></B><BR>
+<HR><BR>
+</BODY>
+</HTML>
stub_request(:get, 'http://example.xn--fiqs8s/').to_return(request_fixture('idn.txt'))
stub_request(:head, 'http://example.com/sjis').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
stub_request(:get, 'http://example.com/sjis').to_return(request_fixture('sjis.txt'))
+ stub_request(:head, 'http://example.com/sjis_with_wrong_charset').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
+ stub_request(:get, 'http://example.com/sjis_with_wrong_charset').to_return(request_fixture('sjis_with_wrong_charset.txt'))
+ stub_request(:head, 'http://example.com/koi8-r').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
+ stub_request(:get, 'http://example.com/koi8-r').to_return(request_fixture('koi8-r.txt'))
stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404)
subject.call(status)
it 'works with SJIS' do
expect(a_request(:get, 'http://example.com/sjis')).to have_been_made.at_least_once
+ expect(status.preview_card.title).to eq("SJISのページ")
+ end
+ end
+
+ context do
+ let(:status) { Fabricate(:status, text: 'Check out http://example.com/sjis_with_wrong_charset') }
+
+ it 'works with SJIS even with wrong charset header' do
+ expect(a_request(:get, 'http://example.com/sjis_with_wrong_charset')).to have_been_made.at_least_once
+ expect(status.preview_card.title).to eq("SJISのページ")
+ end
+ end
+
+ context do
+ let(:status) { Fabricate(:status, text: 'Check out http://example.com/koi8-r') }
+
+ it 'works with koi8-r' do
+ expect(a_request(:get, 'http://example.com/koi8-r')).to have_been_made.at_least_once
+ expect(status.preview_card.title).to eq("Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.")
end
end
end