88#include < utils/logger.h>
99#include < utils/Utils.h>
1010#include < fstream>
11+ #include < queue>
1112#include " Video.h"
1213#include " ui/ImageView.h"
1314
@@ -47,9 +48,20 @@ struct deleteAVFrame
4748 }
4849};
4950
51+ struct deleteAVFrameRef
52+ {
53+ void operator ()(AVFrame *ptr)
54+ {
55+ if (ptr)
56+ av_frame_unref (ptr);
57+ deleteAVFrame{}(ptr);
58+ }
59+ };
60+
5061using AVCodecContextPtr = std::unique_ptr<AVCodecContext, deleteAVCodecContext>;
5162using AVFormatContextPtr = std::unique_ptr<AVFormatContext, deleteAVFormatContext>;
5263using AVFramePtr = std::unique_ptr<AVFrame, deleteAVFrame>;
64+ using AVFrameRefPtr = std::unique_ptr<AVFrame, deleteAVFrameRef>;
5365
5466}
5567
@@ -69,14 +81,13 @@ namespace Media {
6981 engine.getRootUIView ().addChild (view);
7082 }
7183
72- bool init ( const uint16_t width, const uint16_t height )
84+ ~Video ( )
7385 {
74- frame.reset (av_frame_alloc ());
75- if (!frame) {
76- LogWarn () << " Could not allocate frame" ;
77- return false ;
78- }
86+ view->setHidden (true );
87+ }
7988
89+ bool init (const uint16_t width, const uint16_t height)
90+ {
8091 av_init_packet (&packet);
8192 packet.data = nullptr ;
8293 packet.size = 0 ;
@@ -102,24 +113,33 @@ namespace Media {
102113 return false ;
103114 }
104115
105- if (!openCodecContext (videoStreamIndex, videoCodecContext, AVMEDIA_TYPE_VIDEO)) {
116+ if (!openCodecContext (videoStreamIndex, videoCodecContext, videoTimeBase, AVMEDIA_TYPE_VIDEO)) {
106117 LogWarn () << " Could not open video context" ;
107118 return false ;
108119 }
109120
110- if (!openCodecContext (audioStreamIndex, audioCodecContext, AVMEDIA_TYPE_AUDIO)) {
121+ if (!openCodecContext (audioStreamIndex, audioCodecContext, audioTimeBase, AVMEDIA_TYPE_AUDIO)) {
111122 LogWarn () << " Could not open audio context" ;
112123 return false ;
113124 }
114125
115126 av_dump_format (formatContext.get (), 0 , fileFullPath.c_str (), 0 ); // TODO: dump to logger
116127
128+ if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
129+ LogWarn () << " Unknown pixel format" ;
130+ return false ;
131+ }
132+
133+ // load first frame
134+ if (nextVideoFrame () != Outcome::FrameVideo)
135+ return false ;
136+
117137 initialized_ = true ;
118138 return true ;
119139 }
120140
121141 private:
122- bool openCodecContext (int &streamIndex, AVCodecContextPtr &codecContext, AVMediaType mediaType)
142+ bool openCodecContext (int &streamIndex, AVCodecContextPtr &codecContext, double &timeBase, AVMediaType mediaType)
123143 {
124144 int index = av_find_best_stream (formatContext.get (), mediaType, -1 , -1 , nullptr , 0 );
125145 if (index < 0 ) {
@@ -147,104 +167,150 @@ namespace Media {
147167 }
148168
149169 AVDictionary *opts = nullptr ;
150- av_dict_set (&opts, " refcounted_frames" , " 0 " , 0 );
170+ av_dict_set (&opts, " refcounted_frames" , " 1 " , 0 );
151171 if (avcodec_open2 (context.get (), codec, &opts) < 0 ) {
152172 LogWarn () << " Could not open codec for stream" << av_get_media_type_string (mediaType);
153173 return false ;
154174 }
155175
156176 streamIndex = index;
157177 std::swap (codecContext, context);
178+ timeBase = av_q2d (stream->time_base );
158179 return true ;
159180 }
160181
161- public:
162- void nextFrame ()
182+ enum class Outcome {
183+ End, Error, FrameVideo, FrameAudio
184+ };
185+
186+ Outcome nextVideoFrame ()
187+ {
188+ while (true ) {
189+ Outcome r = nextFrame ();
190+ if (r == Outcome::End || r == Outcome::Error)
191+ return r;
192+
193+ if (r == Outcome::FrameVideo)
194+ return r;
195+ }
196+ }
197+
198+ Outcome nextFrame ()
163199 {
164200 int r = av_read_frame (formatContext.get (), &packet);
165201 if (r < 0 ) {// no more frames or error
166- std::cout << " nextframe abort 1 " << std::endl ;
167- ::abort () ;
202+ LogInfo () << " No more frames or cannot read them " ;
203+ return Outcome::End ;
168204 }
169205
170206 if (packet.stream_index == videoStreamIndex) {
171207 r = avcodec_send_packet (videoCodecContext.get (), &packet);
172208 if (r < 0 || r == AVERROR (EAGAIN) || r == AVERROR_EOF) {
173- std::cout << " avcodec_send_packet: " << r << std::endl;
174- ::abort () ;
209+ LogWarn () << " avcodec_send_packet: " << r << std::endl;
210+ return Outcome::Error ;
175211 }
176212
177213 while (r >= 0 ) {
214+ AVFrameRefPtr frame{av_frame_alloc ()};
178215 r = avcodec_receive_frame (videoCodecContext.get (), frame.get ());
179216 if (r == AVERROR (EAGAIN) || r == AVERROR_EOF) {
180217 std::cout << " avcodec_receive_frame: " << r << std::endl;
181218 break ;
182219 }
183- std::cout << " video frame: " << videoCodecContext->frame_number << std::endl;
184220
185- view->setHidden (false );
186- view->setSize (Math::float2 (1 ,1 ));
221+ std::cout << " video frame: " << videoCodecContext->frame_number << " , pts: " << frame->pts << std::endl;
187222
188- if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
189- LogWarn () << " Unknown pixel format" ;
190- return ;
191- }
192-
193- Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc ();
194-
195- std::vector<uint8_t > data;
196- int count = videoCodecContext->width * videoCodecContext->height ;
197- auto clamp = [](int x){ return std::min (255 , std::max (0 , x)); };
198-
199- // conversion from YUV420p to RGBA
200- // TODO: this can be probably accelerated by doing conversion within the fragment shader
201-
202- uint8_t *src = frame->data [0 ];
203- int linesize = frame->linesize [0 ];
204- int w = videoCodecContext->width ;
205- int h = videoCodecContext->height ;
206-
207- uint8_t *dataY = frame->data [0 ];
208- int linesizeY = frame->linesize [0 ];
223+ videoFrames.push (std::move (frame));
224+ }
225+ return Outcome::FrameVideo;
226+ } else if (packet.stream_index == audioStreamIndex) {
227+ std::cout << " An audio frame" << std::endl;
228+ return Outcome::FrameAudio;
229+ } else {
230+ LogWarn () << " Invalid stream index" ;
231+ return Outcome::Error;
232+ }
233+ }
209234
210- uint8_t *dataU = frame->data [1 ];
211- int linesizeU = frame->linesize [1 ];
235+ std::vector<uint8_t > yuv420pToRGBA (const AVFrameRefPtr &frame)
236+ {
237+ std::vector<uint8_t > data;
238+ int count = videoCodecContext->width * videoCodecContext->height ;
239+ auto clamp = [](int x){ return std::min (255 , std::max (0 , x)); };
240+
241+ uint8_t *src = frame->data [0 ];
242+ int linesize = frame->linesize [0 ];
243+ int w = videoCodecContext->width ;
244+ int h = videoCodecContext->height ;
245+
246+ uint8_t *dataY = frame->data [0 ];
247+ int linesizeY = frame->linesize [0 ];
248+
249+ uint8_t *dataU = frame->data [1 ];
250+ int linesizeU = frame->linesize [1 ];
251+
252+ uint8_t *dataV = frame->data [2 ];
253+ int linesizeV = frame->linesize [2 ];
254+
255+ for (int y = 0 ; y < h; ++y)
256+ for (int x = 0 ; x < w; ++x) {
257+ int Y = dataY[y*linesizeY+x];
258+ int U = dataU[y/2 *linesizeU+x/2 ];
259+ int V = dataV[y/2 *linesizeV+x/2 ];
260+
261+ int C = Y - 16 ;
262+ int D = U - 128 ;
263+ int E = V - 128 ;
264+
265+ int R = clamp ( (298 *C + 409 *E + 128 ) >> 8 );
266+ int G = clamp ( (298 *C - 100 *D - 208 *E + 128 ) >> 8 );
267+ int B = clamp ( (298 *C + 516 *D + 128 ) >> 8 );
268+
269+ data.push_back (R);
270+ data.push_back (G);
271+ data.push_back (B);
272+ data.push_back (255 );
273+ }
212274
213- uint8_t *dataV = frame-> data [ 2 ] ;
214- int linesizeV = frame-> linesize [ 2 ];
275+ return data;
276+ }
215277
216- for (int y = 0 ; y < h; ++y)
217- for (int x = 0 ; x < w; ++x) {
218- int Y = dataY[y*linesizeY+x];
219- int U = dataU[y/2 *linesizeU+x/2 ];
220- int V = dataV[y/2 *linesizeV+x/2 ];
278+ void displayVideoFrame (const AVFrameRefPtr &frame)
279+ {
280+ // TODO: this can be probably accelerated by doing conversion within the fragment shader
281+ auto data = yuv420pToRGBA (frame);
221282
222- int C = Y - 16 ;
223- int D = U - 128 ;
224- int E = V - 128 ;
283+ view->setHidden (false );
284+ view->setSize (Math::float2 (1 ,1 ));
225285
226- int R = clamp ( (298 *C + 409 *E + 128 ) >> 8 );
227- int G = clamp ( (298 *C - 100 *D - 208 *E + 128 ) >> 8 );
228- int B = clamp ( (298 *C + 516 *D + 128 ) >> 8 );
286+ Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc ();
229287
230- data.push_back (R);
231- data.push_back (G);
232- data.push_back (B);
233- data.push_back (255 );
234- }
288+ if (!texture.isValid ())
289+ texture = alloc.loadTextureRGBA8 (data, videoCodecContext->width , videoCodecContext->height );
290+ else
291+ std::swap (alloc.getTexture (texture).imageData , data);
292+ alloc.asyncFinalizeLoad (texture);
293+ view->setImage (texture, videoCodecContext->width , videoCodecContext->height );
294+ }
235295
236- if (!texture.isValid ())
237- texture = alloc.loadTextureRGBA8 (data, videoCodecContext->width , videoCodecContext->height );
238- else
239- std::swap (alloc.getTexture (texture).imageData , data);
240- alloc.asyncFinalizeLoad (texture);
241- view->setImage (texture, videoCodecContext->width , videoCodecContext->height );
242- }
243- } else if (packet.stream_index == audioStreamIndex) {
244- std::cout << " An audio frame" << std::endl;
245- } else {
246- LogWarn () << " Invalid stream index" ;
296+ public:
297+ bool update (double dt)
298+ {
299+ currentTime += dt/videoTimeBase;
300+ assert (!videoFrames.empty ());
301+ assert (videoFrames.front ());
302+ if (currentTime >= videoFrames.front ()->pts ) {
303+ AVFrameRefPtr frame;
304+ videoFrames.front ().swap (frame);
305+ videoFrames.pop ();
306+ std::cout << " Displaying frame " << frame->display_picture_number << " " << frame->pts << std::endl;
307+
308+ displayVideoFrame (frame);
309+
310+ if (nextVideoFrame () != Outcome::FrameVideo)
311+ return false ;
247312 }
313+ return true ;
248314 }
249315
250316 bool initialized () const {
@@ -258,7 +324,8 @@ namespace Media {
258324 AVFormatContextPtr formatContext;
259325 int videoStreamIndex, audioStreamIndex;
260326 AVCodecContextPtr videoCodecContext, audioCodecContext;
261- AVFramePtr frame;
327+ std::queue<AVFrameRefPtr> videoFrames;
328+ double videoTimeBase = 0 , audioTimeBase = 0 , currentTime = 0 ;
262329 AVPacket packet;
263330 Handle::TextureHandle texture;
264331 UI::ImageView *view;
@@ -292,7 +359,8 @@ void Media::VideoPlayer::frameUpdate(double dt, uint16_t width, uint16_t height)
292359 return ;
293360 }
294361
295- currentVideo->nextFrame ();
362+ if (!currentVideo->update (dt))
363+ currentVideo = nullptr ;
296364#endif
297365}
298366
0 commit comments