Do not pass unknown encoding names to nokogiri. (#30987)
This commit is contained in:
parent
36592d10aa
commit
2ea9336b68
3 changed files with 35 additions and 1 deletions
|
@ -274,7 +274,7 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def detect_encoding_and_parse_document
|
||||
[detect_encoding, nil, @html_charset].uniq.each do |encoding|
|
||||
[detect_encoding, nil, header_encoding].uniq.each do |encoding|
|
||||
document = Nokogiri::HTML(@html, nil, encoding)
|
||||
return document if document.to_s.valid_encoding?
|
||||
end
|
||||
|
@ -286,6 +286,13 @@ class LinkDetailsExtractor
|
|||
guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
||||
end
|
||||
|
||||
def header_encoding
|
||||
Encoding.find(@html_charset).name if @html_charset
|
||||
rescue ArgumentError
|
||||
# Encoding from HTTP header is not recognized by ruby
|
||||
nil
|
||||
end
|
||||
|
||||
def detector
|
||||
@detector ||= CharlockHolmes::EncodingDetector.new.tap do |detector|
|
||||
detector.strip_tags = true
|
||||
|
|
18
spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
vendored
Normal file
18
spec/fixtures/requests/alternative_utf8_spelling_in_header.txt
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
HTTP/1.1 200 OK
|
||||
server: nginx
|
||||
date: Thu, 13 Jun 2024 14:33:13 GMT
|
||||
content-type: text/html; charset=utf8
|
||||
content-length: 192
|
||||
accept-ranges: bytes
|
||||
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Webserver Configs R Us</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Welcome</h2>
|
||||
<p>Sneaky non-UTF character: á</p>
|
||||
</body>
|
||||
</html>
|
|
@ -32,6 +32,7 @@ RSpec.describe FetchLinkCardService do
|
|||
stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt'))
|
||||
stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt'))
|
||||
stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt'))
|
||||
stub_request(:get, 'http://example.com/alternative_utf8_spelling_in_header').to_return(request_fixture('alternative_utf8_spelling_in_header.txt'))
|
||||
|
||||
Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache
|
||||
|
||||
|
@ -292,6 +293,14 @@ RSpec.describe FetchLinkCardService do
|
|||
expect(status.preview_card).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a URL where the `Content-Type` header uses `utf8` instead of `utf-8`' do
|
||||
let(:status) { Fabricate(:status, text: 'test http://example.com/alternative_utf8_spelling_in_header') }
|
||||
|
||||
it 'does not create a preview card' do
|
||||
expect(status.preview_card.title).to eq 'Webserver Configs R Us'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a remote status' do
|
||||
|
|
Loading…
Reference in a new issue