// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Note: ported from Chromium commit head: 85fdf90 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ #include <stddef.h> #include <stdint.h> #include <memory> #include <queue> #include <utility> #include <vector> #include "base/macros.h" #include "base/memory/linked_ptr.h" #include "base/memory/ref_counted.h" #include "base/memory/weak_ptr.h" #include "base/synchronization/waitable_event.h" #include "base/threading/thread.h" #include "h264_decoder.h" #include "v4l2_device.h" #include "video_decode_accelerator.h" #include "videodev2.h" #include "vp8_decoder.h" #include "vp9_decoder.h" namespace media { // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice // level codec API for decoding. The slice level API provides only a low-level // decoding functionality and requires userspace to provide support for parsing // the input stream and managing decoder state across frames. class V4L2SliceVideoDecodeAccelerator : public VideoDecodeAccelerator { public: class V4L2DecodeSurface; V4L2SliceVideoDecodeAccelerator( const scoped_refptr<V4L2Device>& device); ~V4L2SliceVideoDecodeAccelerator() override; // VideoDecodeAccelerator implementation. bool Initialize(const Config& config, Client* client) override; void Decode(const BitstreamBuffer& bitstream_buffer) override; void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override; void ImportBufferForPicture( int32_t picture_buffer_id, VideoPixelFormat pixel_format, const NativePixmapHandle& native_pixmap_handle) override; void ReusePictureBuffer(int32_t picture_buffer_id) override; void Flush() override; void Reset() override; void Destroy() override; bool TryToSetupDecodeOnSeparateThread( const base::WeakPtr<Client>& decode_client, const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner) override; static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles(); private: class V4L2H264Accelerator; class V4L2VP8Accelerator; class V4L2VP9Accelerator; // Record for input buffers. struct InputRecord { InputRecord(); int32_t input_id; void* address; size_t length; size_t bytes_used; bool at_device; }; // Record for output buffers. struct OutputRecord { OutputRecord(); OutputRecord(OutputRecord&&) = default; bool at_device; bool at_client; int32_t picture_id; std::vector<base::ScopedFD> dmabuf_fds; bool cleared; }; // See http://crbug.com/255116. // Input bitstream buffer size for up to 1080p streams. const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024; // Input bitstream buffer size for up to 4k streams. const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p; const size_t kNumInputBuffers = 16; // Input format V4L2 fourccs this class supports. static const uint32_t supported_input_fourccs_[]; // // Below methods are used by accelerator implementations. // // Append slice data in |data| of size |size| to pending hardware // input buffer with |index|. This buffer will be submitted for decode // on the next DecodeSurface(). Return true on success. bool SubmitSlice(int index, const uint8_t* data, size_t size); // Submit controls in |ext_ctrls| to hardware. Return true on success. bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls); // Gets current control values for controls in |ext_ctrls| from the driver. // Return true on success. bool GetExtControls(struct v4l2_ext_controls* ext_ctrls); // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise. bool IsCtrlExposed(uint32_t ctrl_id); // Decode of |dec_surface| is ready to be submitted and all codec-specific // settings are set in hardware. void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); // |dec_surface| is ready to be outputted once decode is finished. // This can be called before decode is actually done in hardware, and this // method is responsible for maintaining the ordering, i.e. the surfaces will // be outputted in the same order as SurfaceReady calls. To do so, the // surfaces are put on decoder_display_queue_ and sent to output in that // order once all preceding surfaces are sent. void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface); // // Internal methods of this class. // // Recycle a V4L2 input buffer with |index| after dequeuing from device. void ReuseInputBuffer(int index); // Recycle V4L2 output buffer with |index|. Used as surface release callback. void ReuseOutputBuffer(int index); // Queue a |dec_surface| to device for decoding. void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface); // Dequeue any V4L2 buffers available and process. void Dequeue(); // V4L2 QBUF helpers. bool EnqueueInputRecord(int index, uint32_t config_store); bool EnqueueOutputRecord(int index); // Set input and output formats in hardware. bool SetupFormats(); // Create input and output buffers. bool CreateInputBuffers(); bool CreateOutputBuffers(); // Destroy input buffers. void DestroyInputBuffers(); // Destroy output buffers. If |dismiss| is true, also dismissing the // associated PictureBuffers. bool DestroyOutputs(bool dismiss); // Used by DestroyOutputs. bool DestroyOutputBuffers(); // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer() // and signal |done| after finishing. void DismissPictures(const std::vector<int32_t>& picture_buffer_ids, base::WaitableEvent* done); // Task to finish initialization on decoder_thread_. void InitializeTask(); void NotifyError(Error error); void DestroyTask(); // Sets the state to kError and notifies client if needed. void SetErrorState(Error error); // Event handling. Events include flush, reset and resolution change and are // processed while in kIdle state. // Surface set change (resolution change) flow. // If we have no surfaces allocated, start it immediately, otherwise mark // ourselves as pending for surface set change. void InitiateSurfaceSetChange(); // If a surface set change is pending and we are ready, stop the device, // destroy outputs, releasing resources and dismissing pictures as required, // followed by starting the flow to allocate a new set for the current // resolution/DPB size, as provided by decoder. bool FinishSurfaceSetChange(); // Flush flow when requested by client. // When Flush() is called, it posts a FlushTask, which checks the input queue. // If nothing is pending for decode on decoder_input_queue_, we call // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef // onto the decoder_input_queue_ to schedule a flush. When we reach it later // on, we call InitiateFlush() to perform it at the correct time. void FlushTask(); // Tell the decoder to flush all frames, reset it and mark us as scheduled // for flush, so that we can finish it once all pending decodes are finished. void InitiateFlush(); // To be called if decoder_flushing_ is true. If not all pending frames are // decoded, return false, requesting the caller to try again later. // Otherwise perform flush by sending all pending pictures to the client, // notify it that flush is finished and return true, informing the caller // that further progress can be made. bool FinishFlush(); // Reset flow when requested by client. // Drop all inputs, reset the decoder and mark us as pending for reset. void ResetTask(); // To be called if decoder_resetting_ is true. If not all pending frames are // decoded, return false, requesting the caller to try again later. // Otherwise perform reset by dropping all pending outputs (client is not // interested anymore), notifying it that reset is finished, and return true, // informing the caller that further progress can be made. bool FinishReset(); // Called when a new event is pended. Transitions us into kIdle state (if not // already in it), if possible. Also starts processing events. void NewEventPending(); // Called after all events are processed successfully (i.e. all Finish*() // methods return true) to return to decoding state. bool FinishEventProcessing(); // Process pending events, if any. void ProcessPendingEventsIfNeeded(); // Allocate V4L2 buffers and assign them to |buffers| provided by the client // via AssignPictureBuffers() on decoder thread. void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers); // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the // OutputRecord associated with |picture_buffer_id|, taking ownership of the // file descriptors. void ImportBufferForPictureTask( int32_t picture_buffer_id, // TODO(posciak): (https://crbug.com/561749) we should normally be able to // pass the vector by itself via std::move, but it's not possible to do // this if this method is used as a callback. std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds); // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_ // returning an event. void ServiceDeviceTask(); // Schedule poll if we have any buffers queued and the poll thread // is not stopped (on surface set change). void SchedulePollIfNeeded(); // Attempt to start/stop device_poll_thread_. bool StartDevicePoll(); bool StopDevicePoll(bool keep_input_state); // Ran on device_poll_thread_ to wait for device events. void DevicePollTask(bool poll_device); enum State { // We are in this state until Initialize() returns successfully. // We can't post errors to the client in this state yet. kUninitialized, // Initialize() returned successfully. kInitialized, // This state allows making progress decoding more input stream. kDecoding, // Transitional state when we are not decoding any more stream, but are // performing flush, reset, resolution change or are destroying ourselves. kIdle, // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting // AssignPictureBuffers(). kAwaitingPictureBuffers, // Error state, set when sending NotifyError to client. kError, }; // Buffer id for flush buffer, queued by FlushTask(). const int kFlushBufferId = -2; // Handler for Decode() on decoder_thread_. void DecodeTask(const BitstreamBuffer& bitstream_buffer); // Schedule a new DecodeBufferTask if we are decoding. void ScheduleDecodeBufferTaskIfNeeded(); // Main decoder loop. Keep decoding the current buffer in decoder_, asking // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so, // and handle other returns from it appropriately. void DecodeBufferTask(); // Check decoder_input_queue_ for any available buffers to decode and // set the decoder_current_bitstream_buffer_ to the next buffer if one is // available, taking it off the queue. Also set the current stream pointer // in decoder_, and return true. // Return false if no buffers are pending on decoder_input_queue_. bool TrySetNewBistreamBuffer(); // Auto-destruction reference for EGLSync (for message-passing). void ReusePictureBufferTask(int32_t picture_buffer_id); // Called to actually send |dec_surface| to the client, after it is decoded // preserving the order in which it was scheduled via SurfaceReady(). void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); // Goes over the |decoder_display_queue_| and sends all buffers from the // front of the queue that are already decoded to the client, in order. void TryOutputSurfaces(); // Creates a new decode surface or returns nullptr if one is not available. scoped_refptr<V4L2DecodeSurface> CreateSurface(); // Send decoded pictures to PictureReady. void SendPictureReady(); // Callback that indicates a picture has been cleared. void PictureCleared(); size_t input_planes_count_; size_t output_planes_count_; // GPU Child thread task runner. const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_; // Task runner Decode() and PictureReady() run on. scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_; // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or // device worker threads back to the child thread. base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_; // To expose client callbacks from VideoDecodeAccelerator. // NOTE: all calls to these objects *MUST* be executed on // child_task_runner_. std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>> client_ptr_factory_; base::WeakPtr<VideoDecodeAccelerator::Client> client_; // Callbacks to |decode_client_| must be executed on |decode_task_runner_|. base::WeakPtr<Client> decode_client_; // V4L2 device in use. scoped_refptr<V4L2Device> device_; // Thread to communicate with the device on. base::Thread decoder_thread_; scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_; // Thread used to poll the device for events. base::Thread device_poll_thread_; // Input queue state. bool input_streamon_; // Number of input buffers enqueued to the device. int input_buffer_queued_count_; // Input buffers ready to use; LIFO since we don't care about ordering. std::list<int> free_input_buffers_; // Mapping of int index to an input buffer record. std::vector<InputRecord> input_buffer_map_; // Output queue state. bool output_streamon_; // Number of output buffers enqueued to the device. int output_buffer_queued_count_; // Output buffers ready to use. std::list<int> free_output_buffers_; // Mapping of int index to an output buffer record. std::vector<OutputRecord> output_buffer_map_; VideoCodecProfile video_profile_; uint32_t input_format_fourcc_; uint32_t output_format_fourcc_; Size coded_size_; struct BitstreamBufferRef; // Input queue of stream buffers coming from the client. std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_; // BitstreamBuffer currently being processed. std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; // Queue storing decode surfaces ready to be output as soon as they are // decoded. The surfaces must be output in order they are queued. std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_; // Decoder state. State state_; Config::OutputMode output_mode_; // If any of these are true, we are waiting for the device to finish decoding // all previously-queued frames, so we can finish the flush/reset/surface // change flows. These can stack. bool decoder_flushing_; bool decoder_resetting_; bool surface_set_change_pending_; // Hardware accelerators. // TODO(posciak): Try to have a superclass here if possible. std::unique_ptr<V4L2H264Accelerator> h264_accelerator_; std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_; std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_; // Codec-specific software decoder in use. std::unique_ptr<AcceleratedVideoDecoder> decoder_; // Surfaces queued to device to keep references to them while decoded. using V4L2DecodeSurfaceByOutputId = std::map<int, scoped_refptr<V4L2DecodeSurface>>; V4L2DecodeSurfaceByOutputId surfaces_at_device_; // Surfaces sent to client to keep references to them while displayed. using V4L2DecodeSurfaceByPictureBufferId = std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>; V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_; // Record for decoded pictures that can be sent to PictureReady. struct PictureRecord { PictureRecord(bool cleared, const Picture& picture); ~PictureRecord(); bool cleared; // Whether the texture is cleared and safe to render from. Picture picture; // The decoded picture. }; // Pictures that are ready but not sent to PictureReady yet. std::queue<PictureRecord> pending_picture_ready_; // The number of pictures that are sent to PictureReady and will be cleared. int picture_clearing_count_; // The WeakPtrFactory for |weak_this_|. base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_; DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator); }; } // namespace media #endif // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_