22 #include "av1/common/enums.h"
23 #include "common/tools_common.h"
24 #include "common/video_writer.h"
25 #include "aom_ports/aom_timer.h"
27 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest));
29 static const char *exec_name;
31 void usage_exit(
void) { exit(EXIT_FAILURE); }
33 static int mode_to_num_layers[4] = { 1, 2, 3, 3 };
36 struct RateControlMetrics {
55 double avg_st_encoding_bitrate;
57 double variance_st_encoding_bitrate;
65 static int read_frame(
struct AvxInputContext *input_ctx,
aom_image_t *img) {
66 FILE *f = input_ctx->file;
67 y4m_input *y4m = &input_ctx->y4m;
70 if (input_ctx->file_type == FILE_TYPE_Y4M) {
71 if (y4m_input_fetch_frame(y4m, f, img) < 1)
return 0;
73 shortread = read_yuv_frame(input_ctx, img);
79 static int file_is_y4m(
const char detect[4]) {
80 if (memcmp(detect,
"YUV4", 4) == 0) {
86 static int fourcc_is_ivf(
const char detect[4]) {
87 if (memcmp(detect,
"DKIF", 4) == 0) {
93 static void close_input_file(
struct AvxInputContext *input) {
95 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
98 static void open_input_file(
struct AvxInputContext *input,
101 input->file = strcmp(input->filename,
"-") ? fopen(input->filename,
"rb")
102 : set_binary_mode(stdin);
104 if (!input->file) fatal(
"Failed to open input file");
106 if (!fseeko(input->file, 0, SEEK_END)) {
110 input->length = ftello(input->file);
115 input->pixel_aspect_ratio.numerator = 1;
116 input->pixel_aspect_ratio.denominator = 1;
121 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
122 input->detect.position = 0;
124 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
125 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
126 input->only_i420) >= 0) {
127 input->file_type = FILE_TYPE_Y4M;
128 input->width = input->y4m.pic_w;
129 input->height = input->y4m.pic_h;
130 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
131 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
132 input->framerate.numerator = input->y4m.fps_n;
133 input->framerate.denominator = input->y4m.fps_d;
134 input->fmt = input->y4m.aom_fmt;
135 input->bit_depth = input->y4m.bit_depth;
137 fatal(
"Unsupported Y4M stream.");
139 }
else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
140 fatal(
"IVF is not supported as input.");
142 input->file_type = FILE_TYPE_RAW;
152 static void set_rate_control_metrics(
struct RateControlMetrics *rc,
154 unsigned int ts_number_layers) {
156 ts_rate_decimator[0] = 1;
157 if (ts_number_layers == 2) {
158 ts_rate_decimator[0] = 2;
159 ts_rate_decimator[1] = 1;
161 if (ts_number_layers == 3) {
162 ts_rate_decimator[0] = 4;
163 ts_rate_decimator[1] = 2;
164 ts_rate_decimator[2] = 1;
168 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
170 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0];
171 for (
unsigned int i = 0; i < ts_number_layers; ++i) {
173 rc->layer_framerate[i] = framerate / ts_rate_decimator[i];
176 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
177 (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
179 rc->layer_input_frames[i] = 0;
180 rc->layer_enc_frames[i] = 0;
181 rc->layer_tot_enc_frames[i] = 0;
182 rc->layer_encoding_bitrate[i] = 0.0;
183 rc->layer_avg_frame_size[i] = 0.0;
184 rc->layer_avg_rate_mismatch[i] = 0.0;
186 rc->window_count = 0;
187 rc->window_size = 15;
188 rc->avg_st_encoding_bitrate = 0.0;
189 rc->variance_st_encoding_bitrate = 0.0;
192 static void printout_rate_control_summary(
struct RateControlMetrics *rc,
194 unsigned int ts_number_layers) {
195 int tot_num_frames = 0;
196 double perc_fluctuation = 0.0;
197 printf(
"Total number of processed frames: %d\n\n", frame_cnt - 1);
198 printf(
"Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
199 for (
unsigned int i = 0; i < ts_number_layers; ++i) {
200 const int num_dropped =
201 i > 0 ? rc->layer_input_frames[i] - rc->layer_enc_frames[i]
202 : rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1;
203 tot_num_frames += rc->layer_input_frames[i];
204 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
205 rc->layer_encoding_bitrate[i] /
207 rc->layer_avg_frame_size[i] =
208 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[i];
209 rc->layer_avg_rate_mismatch[i] =
210 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i];
211 printf(
"For layer#: %d\n", i);
212 printf(
"Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
213 rc->layer_encoding_bitrate[i]);
214 printf(
"Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
215 rc->layer_avg_frame_size[i]);
216 printf(
"Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
218 "Number of input frames, encoded (non-key) frames, "
219 "and perc dropped frames: %d %d %f\n",
220 rc->layer_input_frames[i], rc->layer_enc_frames[i],
221 100.0 * num_dropped / rc->layer_input_frames[i]);
224 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
225 rc->variance_st_encoding_bitrate =
226 rc->variance_st_encoding_bitrate / rc->window_count -
227 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
228 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
229 rc->avg_st_encoding_bitrate;
230 printf(
"Short-time stats, for window of %d frames:\n", rc->window_size);
231 printf(
"Average, rms-variance, and percent-fluct: %f %f %f\n",
232 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
234 if (frame_cnt - 1 != tot_num_frames)
235 die(
"Error: Number of input frames not equal to output!\n");
239 static int set_layer_pattern(
int layering_mode,
int frame_cnt,
248 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->
ref_idx[i] = i;
249 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->
refresh[i] = 0;
254 switch (layering_mode) {
258 ref_frame_config->
refresh[0] = 1;
264 if (frame_cnt % 2 == 0) {
267 ref_frame_config->
refresh[0] = 1;
279 if (frame_cnt % 4 == 0) {
283 ref_frame_config->
refresh[0] = 1;
284 }
else if ((frame_cnt - 1) % 4 == 0) {
288 }
else if ((frame_cnt - 2) % 4 == 0) {
291 ref_frame_config->
refresh[1] = 1;
293 }
else if ((frame_cnt - 3) % 4 == 0) {
298 ref_frame_config->
ref_idx[0] = 1;
299 ref_frame_config->
ref_idx[1] = 0;
309 if (frame_cnt % 4 == 0) {
313 ref_frame_config->
refresh[0] = 1;
315 }
else if ((frame_cnt - 1) % 4 == 0) {
319 }
else if ((frame_cnt - 2) % 4 == 0) {
322 ref_frame_config->
refresh[3] = 1;
324 }
else if ((frame_cnt - 3) % 4 == 0) {
330 default: assert(0); die(
"Error: Unsupported temporal layering mode!\n");
335 int main(
int argc,
char **argv) {
344 uint32_t error_resilient = 0;
351 int frame_duration = 1;
352 int layering_mode = 0;
356 const AvxInterface *encoder = NULL;
357 struct AvxInputContext input_ctx;
358 struct RateControlMetrics rc;
360 const int min_args_base = 13;
361 const int min_args = min_args_base;
362 double sum_bitrate = 0.0;
363 double sum_bitrate2 = 0.0;
364 double framerate = 30.0;
365 zero(rc.layer_target_bitrate);
367 memset(&input_ctx, 0,
sizeof(input_ctx));
368 memset(&svc_params, 0,
sizeof(svc_params));
371 input_ctx.framerate.numerator = 30;
372 input_ctx.framerate.denominator = 1;
373 input_ctx.only_i420 = 1;
374 input_ctx.bit_depth = 0;
375 unsigned int ts_number_layers = 1;
376 unsigned int ss_number_layers = 1;
379 if (argc < min_args) {
380 die(
"Usage: %s <infile> <outfile> <codec_type(av1)> <width> <height> "
381 "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
382 "<error_resilient> <threads> <mode> "
383 "<Rate_0> ... <Rate_nlayers-1>\n",
387 encoder = get_aom_encoder_by_name(argv[3]);
389 width = (
unsigned int)strtoul(argv[4], NULL, 0);
390 height = (
unsigned int)strtoul(argv[5], NULL, 0);
391 if (width < 16 || width % 2 || height < 16 || height % 2) {
392 die(
"Invalid resolution: %d x %d", width, height);
395 layering_mode = (int)strtol(argv[12], NULL, 0);
396 if (layering_mode < 0 || layering_mode > 13) {
397 die(
"Invalid layering mode (0..12) %s", argv[12]);
400 if (argc != min_args + mode_to_num_layers[layering_mode]) {
401 die(
"Invalid number of arguments");
404 ts_number_layers = mode_to_num_layers[layering_mode];
406 input_ctx.filename = argv[1];
407 open_input_file(&input_ctx, 0);
410 if (input_ctx.file_type != FILE_TYPE_Y4M) {
412 die(
"Failed to allocate image", width, height);
431 speed = (int)strtol(argv[8], NULL, 0);
432 if (speed < 0 || speed > 8) {
433 die(
"Invalid speed setting: must be positive");
436 for (i = min_args_base;
437 (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
438 rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0);
445 if (ts_number_layers == 2) {
448 }
else if (ts_number_layers == 3) {
468 cfg.
g_threads = (
unsigned int)strtoul(argv[11], NULL, 0);
470 error_resilient = (uint32_t)strtoul(argv[10], NULL, 0);
471 if (error_resilient != 0 && error_resilient != 1) {
472 die(
"Invalid value for error resilient (0, 1): %d.", error_resilient);
483 set_rate_control_metrics(&rc, framerate, ts_number_layers);
485 if (input_ctx.file_type == FILE_TYPE_Y4M) {
486 if (input_ctx.width != cfg.
g_w || input_ctx.height != cfg.
g_h) {
487 die(
"Incorrect width or height: %d x %d", cfg.
g_w, cfg.
g_h);
491 die(
"Incorrect framerate: numerator %d denominator %d",
497 for (i = 0; i < ts_number_layers; ++i) {
498 char file_name[PATH_MAX];
500 info.codec_fourcc = encoder->fourcc;
501 info.frame_width = cfg.
g_w;
502 info.frame_height = cfg.
g_h;
506 snprintf(file_name,
sizeof(file_name),
"%s_%d.av1", argv[2], i);
507 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
508 if (!outfile[i]) die(
"Failed to open %s for writing", file_name);
510 assert(outfile[i] != NULL);
515 die_codec(&codec,
"Failed to initialize encoder");
524 for (i = 0; i < ts_number_layers; ++i) {
528 for (i = 0; i < ss_number_layers; ++i) {
538 const int max_intra_size_pct = 300;
544 while (frame_avail || got_data) {
545 struct aom_usec_timer timer;
551 flags = set_layer_pattern(layering_mode, frame_cnt, &layer_id,
556 frame_avail = read_frame(&input_ctx, &raw);
558 aom_usec_timer_start(&timer);
560 die_codec(&codec,
"Failed to encode frame");
562 aom_usec_timer_mark(&timer);
563 cx_time += aom_usec_timer_elapsed(&timer);
570 aom_video_writer_write_frame(outfile[i], pkt->
data.
frame.buf,
572 ++rc.layer_tot_enc_frames[i];
573 rc.layer_encoding_bitrate[i] += 8.0 * pkt->
data.
frame.sz;
577 rc.layer_avg_frame_size[i] += 8.0 * pkt->
data.
frame.sz;
578 rc.layer_avg_rate_mismatch[i] +=
579 fabs(8.0 * pkt->
data.
frame.sz - rc.layer_pfb[i]) /
581 ++rc.layer_enc_frames[i];
587 if (frame_cnt > rc.window_size) {
588 sum_bitrate += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
589 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
590 if (frame_cnt % rc.window_size == 0) {
591 rc.window_count += 1;
592 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
593 rc.variance_st_encoding_bitrate +=
594 (sum_bitrate / rc.window_size) *
595 (sum_bitrate / rc.window_size);
600 if (frame_cnt > rc.window_size + rc.window_size / 2) {
601 sum_bitrate2 += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
602 if (frame_cnt > 2 * rc.window_size &&
603 frame_cnt % rc.window_size == 0) {
604 rc.window_count += 1;
605 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
606 rc.variance_st_encoding_bitrate +=
607 (sum_bitrate2 / rc.window_size) *
608 (sum_bitrate2 / rc.window_size);
617 pts += frame_duration;
619 close_input_file(&input_ctx);
620 printout_rate_control_summary(&rc, frame_cnt, ts_number_layers);
622 printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
623 frame_cnt, 1000 * (
float)cx_time / (
double)(frame_cnt * 1000000),
624 1000000 * (
double)frame_cnt / (
double)cx_time);
629 for (i = 0; i < ts_number_layers; ++i) aom_video_writer_close(outfile[i]);
631 if (input_ctx.file_type != FILE_TYPE_Y4M) {