Skip to content

Commit 56fa5bc

Browse files
committed
Play video at expected frame rate
1 parent fd1d4c0 commit 56fa5bc

File tree

1 file changed

+141
-73
lines changed

1 file changed

+141
-73
lines changed

src/media/Video.cpp

Lines changed: 141 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <utils/logger.h>
99
#include <utils/Utils.h>
1010
#include <fstream>
11+
#include <queue>
1112
#include "Video.h"
1213
#include "ui/ImageView.h"
1314

@@ -47,9 +48,20 @@ struct deleteAVFrame
4748
}
4849
};
4950

51+
struct deleteAVFrameRef
52+
{
53+
void operator()(AVFrame *ptr)
54+
{
55+
if (ptr)
56+
av_frame_unref(ptr);
57+
deleteAVFrame{}(ptr);
58+
}
59+
};
60+
5061
using AVCodecContextPtr = std::unique_ptr<AVCodecContext, deleteAVCodecContext>;
5162
using AVFormatContextPtr = std::unique_ptr<AVFormatContext, deleteAVFormatContext>;
5263
using AVFramePtr = std::unique_ptr<AVFrame, deleteAVFrame>;
64+
using AVFrameRefPtr = std::unique_ptr<AVFrame, deleteAVFrameRef>;
5365

5466
}
5567

@@ -69,14 +81,13 @@ namespace Media {
6981
engine.getRootUIView().addChild(view);
7082
}
7183

72-
bool init(const uint16_t width, const uint16_t height)
84+
~Video()
7385
{
74-
frame.reset(av_frame_alloc());
75-
if (!frame) {
76-
LogWarn() << "Could not allocate frame";
77-
return false;
78-
}
86+
view->setHidden(true);
87+
}
7988

89+
bool init(const uint16_t width, const uint16_t height)
90+
{
8091
av_init_packet(&packet);
8192
packet.data = nullptr;
8293
packet.size = 0;
@@ -102,24 +113,33 @@ namespace Media {
102113
return false;
103114
}
104115

105-
if (!openCodecContext(videoStreamIndex, videoCodecContext, AVMEDIA_TYPE_VIDEO)) {
116+
if (!openCodecContext(videoStreamIndex, videoCodecContext, videoTimeBase, AVMEDIA_TYPE_VIDEO)) {
106117
LogWarn() << "Could not open video context";
107118
return false;
108119
}
109120

110-
if (!openCodecContext(audioStreamIndex, audioCodecContext, AVMEDIA_TYPE_AUDIO)) {
121+
if (!openCodecContext(audioStreamIndex, audioCodecContext, audioTimeBase, AVMEDIA_TYPE_AUDIO)) {
111122
LogWarn() << "Could not open audio context";
112123
return false;
113124
}
114125

115126
av_dump_format(formatContext.get(), 0, fileFullPath.c_str(), 0); // TODO: dump to logger
116127

128+
if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
129+
LogWarn() << "Unknown pixel format";
130+
return false;
131+
}
132+
133+
// load first frame
134+
if (nextVideoFrame() != Outcome::FrameVideo)
135+
return false;
136+
117137
initialized_ = true;
118138
return true;
119139
}
120140

121141
private:
122-
bool openCodecContext(int &streamIndex, AVCodecContextPtr &codecContext, AVMediaType mediaType)
142+
bool openCodecContext(int &streamIndex, AVCodecContextPtr &codecContext, double &timeBase, AVMediaType mediaType)
123143
{
124144
int index = av_find_best_stream(formatContext.get(), mediaType, -1, -1, nullptr, 0);
125145
if (index < 0) {
@@ -147,104 +167,150 @@ namespace Media {
147167
}
148168

149169
AVDictionary *opts = nullptr;
150-
av_dict_set(&opts, "refcounted_frames", "0", 0);
170+
av_dict_set(&opts, "refcounted_frames", "1", 0);
151171
if (avcodec_open2(context.get(), codec, &opts) < 0) {
152172
LogWarn() << "Could not open codec for stream" << av_get_media_type_string(mediaType);
153173
return false;
154174
}
155175

156176
streamIndex = index;
157177
std::swap(codecContext, context);
178+
timeBase = av_q2d(stream->time_base);
158179
return true;
159180
}
160181

161-
public:
162-
void nextFrame()
182+
enum class Outcome {
183+
End, Error, FrameVideo, FrameAudio
184+
};
185+
186+
Outcome nextVideoFrame()
187+
{
188+
while(true) {
189+
Outcome r = nextFrame();
190+
if (r == Outcome::End || r == Outcome::Error)
191+
return r;
192+
193+
if (r == Outcome::FrameVideo)
194+
return r;
195+
}
196+
}
197+
198+
Outcome nextFrame()
163199
{
164200
int r = av_read_frame(formatContext.get(), &packet);
165201
if (r < 0) {// no more frames or error
166-
std::cout << "nextframe abort 1" << std::endl;
167-
::abort();
202+
LogInfo() << "No more frames or cannot read them";
203+
return Outcome::End;
168204
}
169205

170206
if (packet.stream_index == videoStreamIndex) {
171207
r = avcodec_send_packet(videoCodecContext.get(), &packet);
172208
if(r < 0 || r == AVERROR(EAGAIN) || r == AVERROR_EOF) {
173-
std::cout << "avcodec_send_packet: " << r << std::endl;
174-
::abort();
209+
LogWarn() << "avcodec_send_packet: " << r << std::endl;
210+
return Outcome::Error;
175211
}
176212

177213
while (r >= 0) {
214+
AVFrameRefPtr frame{av_frame_alloc()};
178215
r = avcodec_receive_frame(videoCodecContext.get(), frame.get());
179216
if (r == AVERROR(EAGAIN) || r == AVERROR_EOF) {
180217
std::cout << "avcodec_receive_frame: " << r << std::endl;
181218
break;
182219
}
183-
std::cout << "video frame: " << videoCodecContext->frame_number << std::endl;
184220

185-
view->setHidden(false);
186-
view->setSize(Math::float2(1,1));
221+
std::cout << "video frame: " << videoCodecContext->frame_number << ", pts: " << frame->pts << std::endl;
187222

188-
if (videoCodecContext->pix_fmt != AV_PIX_FMT_YUV420P) {
189-
LogWarn() << "Unknown pixel format";
190-
return;
191-
}
192-
193-
Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc();
194-
195-
std::vector<uint8_t> data;
196-
int count = videoCodecContext->width * videoCodecContext->height;
197-
auto clamp = [](int x){ return std::min(255, std::max(0, x)); };
198-
199-
// conversion from YUV420p to RGBA
200-
// TODO: this can be probably accelerated by doing conversion within the fragment shader
201-
202-
uint8_t *src = frame->data[0];
203-
int linesize = frame->linesize[0];
204-
int w = videoCodecContext->width;
205-
int h = videoCodecContext->height;
206-
207-
uint8_t *dataY = frame->data[0];
208-
int linesizeY = frame->linesize[0];
223+
videoFrames.push(std::move(frame));
224+
}
225+
return Outcome::FrameVideo;
226+
} else if(packet.stream_index == audioStreamIndex) {
227+
std::cout << "An audio frame" << std::endl;
228+
return Outcome::FrameAudio;
229+
} else {
230+
LogWarn() << "Invalid stream index";
231+
return Outcome::Error;
232+
}
233+
}
209234

210-
uint8_t *dataU = frame->data[1];
211-
int linesizeU = frame->linesize[1];
235+
std::vector<uint8_t> yuv420pToRGBA(const AVFrameRefPtr &frame)
236+
{
237+
std::vector<uint8_t> data;
238+
int count = videoCodecContext->width * videoCodecContext->height;
239+
auto clamp = [](int x){ return std::min(255, std::max(0, x)); };
240+
241+
uint8_t *src = frame->data[0];
242+
int linesize = frame->linesize[0];
243+
int w = videoCodecContext->width;
244+
int h = videoCodecContext->height;
245+
246+
uint8_t *dataY = frame->data[0];
247+
int linesizeY = frame->linesize[0];
248+
249+
uint8_t *dataU = frame->data[1];
250+
int linesizeU = frame->linesize[1];
251+
252+
uint8_t *dataV = frame->data[2];
253+
int linesizeV = frame->linesize[2];
254+
255+
for (int y = 0; y < h; ++y)
256+
for (int x = 0; x < w; ++x) {
257+
int Y = dataY[y*linesizeY+x];
258+
int U = dataU[y/2*linesizeU+x/2];
259+
int V = dataV[y/2*linesizeV+x/2];
260+
261+
int C = Y - 16;
262+
int D = U - 128;
263+
int E = V - 128;
264+
265+
int R = clamp( (298*C + 409*E + 128 ) >> 8 );
266+
int G = clamp( (298*C - 100*D - 208*E + 128 ) >> 8 );
267+
int B = clamp( (298*C + 516*D + 128 ) >> 8 );
268+
269+
data.push_back(R);
270+
data.push_back(G);
271+
data.push_back(B);
272+
data.push_back(255);
273+
}
212274

213-
uint8_t *dataV = frame->data[2];
214-
int linesizeV = frame->linesize[2];
275+
return data;
276+
}
215277

216-
for (int y = 0; y < h; ++y)
217-
for (int x = 0; x < w; ++x) {
218-
int Y = dataY[y*linesizeY+x];
219-
int U = dataU[y/2*linesizeU+x/2];
220-
int V = dataV[y/2*linesizeV+x/2];
278+
void displayVideoFrame(const AVFrameRefPtr &frame)
279+
{
280+
// TODO: this can be probably accelerated by doing conversion within the fragment shader
281+
auto data = yuv420pToRGBA(frame);
221282

222-
int C = Y - 16;
223-
int D = U - 128;
224-
int E = V - 128;
283+
view->setHidden(false);
284+
view->setSize(Math::float2(1,1));
225285

226-
int R = clamp( (298*C + 409*E + 128 ) >> 8 );
227-
int G = clamp( (298*C - 100*D - 208*E + 128 ) >> 8 );
228-
int B = clamp( (298*C + 516*D + 128 ) >> 8 );
286+
Textures::TextureAllocator& alloc = engine.getEngineTextureAlloc();
229287

230-
data.push_back(R);
231-
data.push_back(G);
232-
data.push_back(B);
233-
data.push_back(255);
234-
}
288+
if (!texture.isValid())
289+
texture = alloc.loadTextureRGBA8(data, videoCodecContext->width, videoCodecContext->height);
290+
else
291+
std::swap(alloc.getTexture(texture).imageData, data);
292+
alloc.asyncFinalizeLoad(texture);
293+
view->setImage(texture, videoCodecContext->width, videoCodecContext->height);
294+
}
235295

236-
if (!texture.isValid())
237-
texture = alloc.loadTextureRGBA8(data, videoCodecContext->width, videoCodecContext->height);
238-
else
239-
std::swap(alloc.getTexture(texture).imageData, data);
240-
alloc.asyncFinalizeLoad(texture);
241-
view->setImage(texture, videoCodecContext->width, videoCodecContext->height);
242-
}
243-
} else if(packet.stream_index == audioStreamIndex) {
244-
std::cout << "An audio frame" << std::endl;
245-
} else {
246-
LogWarn() << "Invalid stream index";
296+
public:
297+
bool update(double dt)
298+
{
299+
currentTime += dt/videoTimeBase;
300+
assert(!videoFrames.empty());
301+
assert(videoFrames.front());
302+
if (currentTime >= videoFrames.front()->pts) {
303+
AVFrameRefPtr frame;
304+
videoFrames.front().swap(frame);
305+
videoFrames.pop();
306+
std::cout << "Displaying frame " << frame->display_picture_number << " " << frame->pts << std::endl;
307+
308+
displayVideoFrame(frame);
309+
310+
if (nextVideoFrame() != Outcome::FrameVideo)
311+
return false;
247312
}
313+
return true;
248314
}
249315

250316
bool initialized() const {
@@ -258,7 +324,8 @@ namespace Media {
258324
AVFormatContextPtr formatContext;
259325
int videoStreamIndex, audioStreamIndex;
260326
AVCodecContextPtr videoCodecContext, audioCodecContext;
261-
AVFramePtr frame;
327+
std::queue<AVFrameRefPtr> videoFrames;
328+
double videoTimeBase = 0, audioTimeBase = 0, currentTime = 0;
262329
AVPacket packet;
263330
Handle::TextureHandle texture;
264331
UI::ImageView *view;
@@ -292,7 +359,8 @@ void Media::VideoPlayer::frameUpdate(double dt, uint16_t width, uint16_t height)
292359
return;
293360
}
294361

295-
currentVideo->nextFrame();
362+
if (!currentVideo->update(dt))
363+
currentVideo = nullptr;
296364
#endif
297365
}
298366

0 commit comments

Comments
 (0)