Unescape HTML entities (#24019)

This commit is contained in:
Christian Schmidt 2023-03-08 19:56:41 +01:00 committed by GitHub
parent 9dfe2dbd3f
commit 684a970b3c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 61 additions and 6 deletions

View file

@ -18,7 +18,7 @@ class PlainTextFormatter
if local? if local?
text text
else else
strip_tags(insert_newlines).chomp html_entities.decode(strip_tags(insert_newlines)).chomp
end end
end end
@ -27,4 +27,8 @@ class PlainTextFormatter
def insert_newlines def insert_newlines
text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" }
end end
def html_entities
HTMLEntities.new
end
end end

View file

@ -6,7 +6,7 @@ RSpec.describe PlainTextFormatter do
describe '#to_s' do describe '#to_s' do
subject { described_class.new(status.text, status.local?).to_s } subject { described_class.new(status.text, status.local?).to_s }
context 'given a post with local status' do context 'when status is local' do
let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) } let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) }
it 'returns the raw text' do it 'returns the raw text' do
@ -14,12 +14,63 @@ RSpec.describe PlainTextFormatter do
end end
end end
context 'given a post with remote status' do context 'when status is remote' do
let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') } let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') }
let(:status) { Fabricate(:status, account: remote_account, text: '<p>Hello</p><script>alert("Hello")</script>') }
it 'returns tag-stripped text' do context 'when text contains inline HTML tags' do
expect(subject).to eq 'Hello' let(:status) { Fabricate(:status, account: remote_account, text: '<b>Lorem</b> <em>ipsum</em>') }
it 'strips the tags' do
expect(subject).to eq 'Lorem ipsum'
end
end
context 'when text contains <p> tags' do
let(:status) { Fabricate(:status, account: remote_account, text: '<p>Lorem</p><p>ipsum</p>') }
it 'inserts a newline' do
expect(subject).to eq "Lorem\nipsum"
end
end
context 'when text contains a single <br> tag' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem<br>ipsum') }
it 'inserts a newline' do
expect(subject).to eq "Lorem\nipsum"
end
end
context 'when text contains consecutive <br> tag' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem<br><br><br>ipsum') }
it 'inserts a single newline' do
expect(subject).to eq "Lorem\nipsum"
end
end
context 'when text contains HTML entity' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem &amp; ipsum &#x2764;') }
it 'unescapes the entity' do
expect(subject).to eq 'Lorem & ipsum ❤'
end
end
context 'when text contains <script> tag' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem <script> alert("Booh!") </script>ipsum') }
it 'strips the tag and its contents' do
expect(subject).to eq 'Lorem ipsum'
end
end
context 'when text contains an HTML comment tags' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem <!-- Booh! -->ipsum') }
it 'strips the comment' do
expect(subject).to eq 'Lorem ipsum'
end
end end
end end
end end