Browse Source

Fix remote files not using Content-Type header, streaming (#14184)

Eugen Rochko 3 years ago
parent
commit
7aaf2b44ec

+ 10 - 0
app/lib/response_with_limit.rb

@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+class ResponseWithLimit
+  def initialize(response, limit)
+    @response = response
+    @limit = limit
+  end
+
+  attr_reader :response, :limit
+end

+ 13 - 4
app/models/concerns/attachmentable.rb

@@ -8,6 +8,17 @@ module Attachmentable
   MAX_MATRIX_LIMIT = 16_777_216 # 4096x4096px or approx. 16MB
   GIF_MATRIX_LIMIT = 921_600    # 1280x720px
 
+  # For some file extensions, there exist different content
+  # type variants, and browsers often send the wrong one,
+  # for example, sending an audio .ogg file as video/ogg,
+  # likewise, MimeMagic also misreports them as such. For
+  # those files, it is necessary to use the output of the
+  # `file` utility instead
+  INCORRECT_CONTENT_TYPES = %w(
+    video/ogg
+    video/webm
+  ).freeze
+
   included do
     before_post_process :obfuscate_file_name
     before_post_process :set_file_extensions
@@ -21,7 +32,7 @@ module Attachmentable
     self.class.attachment_definitions.each_key do |attachment_name|
       attachment = send(attachment_name)
 
-      next if attachment.blank? || attachment.queued_for_write[:original].blank?
+      next if attachment.blank? || attachment.queued_for_write[:original].blank? || !INCORRECT_CONTENT_TYPES.include?(attachment.instance_read(:content_type))
 
       attachment.instance_write :content_type, calculated_content_type(attachment)
     end
@@ -63,9 +74,7 @@ module Attachmentable
   end
 
   def calculated_content_type(attachment)
-    content_type = Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp
-    content_type = 'video/mp4' if content_type == 'video/x-m4v'
-    content_type
+    Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp
   rescue Terrapin::CommandLineError
     ''
   end

+ 3 - 37
app/models/concerns/remotable.rb

@@ -24,28 +24,16 @@ module Remotable
           Request.new(:get, url).perform do |response|
             raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code)
 
-            content_type = parse_content_type(response.headers.get('content-type').last)
-            extname      = detect_extname_from_content_type(content_type)
-
-            if extname.nil?
-              disposition = response.headers.get('content-disposition').last
-              matches     = disposition&.match(/filename="([^"]*)"/)
-              filename    = matches.nil? ? parsed_url.path.split('/').last : matches[1]
-              extname     = filename.nil? ? '' : File.extname(filename)
-            end
-
-            basename = SecureRandom.hex(8)
-
-            public_send("#{attachment_name}_file_name=", basename + extname)
-            public_send("#{attachment_name}=", StringIO.new(response.body_with_limit(limit)))
+            public_send("#{attachment_name}=", ResponseWithLimit.new(response, limit))
           end
         rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e
           Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
           raise e unless suppress_errors
         rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e
           Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
-          nil
         end
+
+        nil
       end
 
       define_method("#{attribute_name}=") do |url|
@@ -59,26 +47,4 @@ module Remotable
       alias_method("reset_#{attachment_name}!", "download_#{attachment_name}!")
     end
   end
-
-  private
-
-  def detect_extname_from_content_type(content_type)
-    return if content_type.nil?
-
-    type = MIME::Types[content_type].first
-
-    return if type.nil?
-
-    extname = type.extensions.first
-
-    return if extname.nil?
-
-    ".#{extname}"
-  end
-
-  def parse_content_type(content_type)
-    return if content_type.nil?
-
-    content_type.split(/\s*;\s*/).first
-  end
 end

+ 2 - 0
config/application.rb

@@ -9,10 +9,12 @@ Bundler.require(*Rails.groups)
 require_relative '../app/lib/exceptions'
 require_relative '../lib/paperclip/url_generator_extensions'
 require_relative '../lib/paperclip/attachment_extensions'
+require_relative '../lib/paperclip/media_type_spoof_detector_extensions'
 require_relative '../lib/paperclip/lazy_thumbnail'
 require_relative '../lib/paperclip/gif_transcoder'
 require_relative '../lib/paperclip/video_transcoder'
 require_relative '../lib/paperclip/type_corrector'
+require_relative '../lib/paperclip/response_with_limit_adapter'
 require_relative '../lib/mastodon/snowflake'
 require_relative '../lib/mastodon/version'
 require_relative '../lib/devise/two_factor_ldap_authenticatable'

+ 1 - 0
config/initializers/paperclip.rb

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 Paperclip::DataUriAdapter.register
+Paperclip::ResponseWithLimitAdapter.register
 
 Paperclip.interpolates :filename do |attachment, style|
   if style == :original

+ 25 - 20
lib/paperclip/image_extractor.rb

@@ -4,28 +4,10 @@ require 'mime/types/columnar'
 
 module Paperclip
   class ImageExtractor < Paperclip::Processor
-    IMAGE_EXTRACTION_OPTIONS = {
-      convert_options: {
-        output: {
-          'loglevel' => 'fatal',
-          vf: 'scale=\'min(400\, iw):min(400\, ih)\':force_original_aspect_ratio=decrease',
-        }.freeze,
-      }.freeze,
-      format: 'png',
-      time: -1,
-      file_geometry_parser: FastGeometryParser,
-    }.freeze
-
     def make
       return @file unless options[:style] == :original
 
-      image = begin
-        begin
-          Paperclip::Transcoder.make(file, IMAGE_EXTRACTION_OPTIONS.dup, attachment)
-        rescue Paperclip::Error, ::Av::CommandError
-          nil
-        end
-      end
+      image = extract_image_from_file!
 
       unless image.nil?
         begin
@@ -36,7 +18,7 @@ module Paperclip
           # to make sure it's cleaned up
 
           begin
-            FileUtils.rm(image)
+            image.close(true)
           rescue Errno::ENOENT
             nil
           end
@@ -45,5 +27,28 @@ module Paperclip
 
       @file
     end
+
+    private
+
+    def extract_image_from_file!
+      ::Av.logger = Paperclip.logger
+
+      cli = ::Av.cli
+      dst = Tempfile.new([File.basename(@file.path, '.*'), '.png'])
+      dst.binmode
+
+      cli.add_source(@file.path)
+      cli.add_destination(dst.path)
+      cli.add_output_param loglevel: 'fatal'
+
+      begin
+        cli.run
+      rescue Cocaine::ExitStatusError
+        dst.close(true)
+        return nil
+      end
+
+      dst
+    end
   end
 end

+ 27 - 0
lib/paperclip/media_type_spoof_detector_extensions.rb

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Paperclip
+  module MediaTypeSpoofDetectorExtensions
+    def calculated_content_type
+      @calculated_content_type ||= type_from_mime_magic || type_from_file_command
+    end
+
+    def type_from_mime_magic
+      @type_from_mime_magic ||= begin
+        begin
+          File.open(@file.path) do |file|
+            MimeMagic.by_magic(file)&.type
+          end
+        rescue Errno::ENOENT
+          ''
+        end
+      end
+    end
+
+    def type_from_file_command
+      @type_from_file_command ||= FileCommandContentTypeDetector.new(@file.path).detect
+    end
+  end
+end
+
+Paperclip::MediaTypeSpoofDetector.prepend(Paperclip::MediaTypeSpoofDetectorExtensions)

+ 55 - 0
lib/paperclip/response_with_limit_adapter.rb

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Paperclip
+  class ResponseWithLimitAdapter < AbstractAdapter
+    def self.register
+      Paperclip.io_adapters.register self do |target|
+        target.is_a?(ResponseWithLimit)
+      end
+    end
+
+    def initialize(target, options = {})
+      super
+      cache_current_values
+    end
+
+    private
+
+    def cache_current_values
+      @original_filename = filename_from_content_disposition || filename_from_path || 'data'
+      @size = @target.response.content_length
+      @tempfile = copy_to_tempfile(@target)
+      @content_type = @target.response.mime_type || ContentTypeDetector.new(@tempfile.path).detect
+    end
+
+    def copy_to_tempfile(source)
+      bytes_read = 0
+
+      source.response.body.each do |chunk|
+        bytes_read += chunk.bytesize
+
+        destination.write(chunk)
+        chunk.clear
+
+        raise Mastodon::LengthValidationError if bytes_read > source.limit
+      end
+
+      destination.rewind
+      destination
+    rescue Mastodon::LengthValidationError
+      destination.close(true)
+      raise
+    ensure
+      source.response.connection.close
+    end
+
+    def filename_from_content_disposition
+      disposition = @target.response.headers['content-disposition']
+      disposition&.match(/filename="([^"]*)"/)&.captures&.first
+    end
+
+    def filename_from_path
+      @target.response.uri.path.split('/').last
+    end
+  end
+end

+ 3 - 7
spec/models/concerns/remotable_spec.rb

@@ -162,19 +162,15 @@ RSpec.describe Remotable do
           let(:headers)   { { 'content-disposition' => file } }
 
           it 'assigns file' do
-            string_io = StringIO.new('')
-            extname   = '.txt'
-            basename  = '0123456789abcdef'
+            response_with_limit = ResponseWithLimit.new(nil, 0)
 
-            allow(SecureRandom).to receive(:hex).and_return(basename)
-            allow(StringIO).to receive(:new).with(anything).and_return(string_io)
+            allow(ResponseWithLimit).to receive(:new).with(anything, anything).and_return(response_with_limit)
 
             expect(foo).to receive(:public_send).with("download_#{hoge}!", url)
 
             foo.hoge_remote_url = url
 
-            expect(foo).to receive(:public_send).with("#{hoge}=", string_io)
-            expect(foo).to receive(:public_send).with("#{hoge}_file_name=", basename + extname)
+            expect(foo).to receive(:public_send).with("#{hoge}=", response_with_limit)
 
             foo.download_hoge!(url)
           end