@@ -50,6 +50,45 @@ VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing,
50
50
}
51
51
}
52
52
53
+ void VideoWriter::initialize_audio (int sample_rate) {
54
+ assert (this ->ofmt_ctx ->oformat ->audio_codec != AV_CODEC_ID_NONE); // check output format supports audio streams
55
+ const AVCodec *audio_avcodec = avcodec_find_encoder (AV_CODEC_ID_AAC);
56
+ assert (audio_avcodec);
57
+ this ->audio_codec_ctx = avcodec_alloc_context3 (audio_avcodec);
58
+ assert (this ->audio_codec_ctx );
59
+ this ->audio_codec_ctx ->sample_fmt = AV_SAMPLE_FMT_FLTP;
60
+ this ->audio_codec_ctx ->sample_rate = sample_rate;
61
+ #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
62
+ av_channel_layout_default (&this ->audio_codec_ctx ->ch_layout , 1 );
63
+ #else
64
+ this ->audio_codec_ctx ->channel_layout = AV_CH_LAYOUT_MONO;
65
+ #endif
66
+ this ->audio_codec_ctx ->bit_rate = 32000 ;
67
+ this ->audio_codec_ctx ->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
68
+ this ->audio_codec_ctx ->time_base = (AVRational){1 , audio_codec_ctx->sample_rate };
69
+ int err = avcodec_open2 (this ->audio_codec_ctx , audio_avcodec, NULL );
70
+ assert (err >= 0 );
71
+ av_log_set_level (AV_LOG_WARNING); // hide "QAvg" info msgs at the end of every segment
72
+
73
+ this ->audio_stream = avformat_new_stream (this ->ofmt_ctx , NULL );
74
+ assert (this ->audio_stream );
75
+ err = avcodec_parameters_from_context (this ->audio_stream ->codecpar , this ->audio_codec_ctx );
76
+ assert (err >= 0 );
77
+
78
+ this ->audio_frame = av_frame_alloc ();
79
+ assert (this ->audio_frame );
80
+ this ->audio_frame ->format = this ->audio_codec_ctx ->sample_fmt ;
81
+ #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
82
+ av_channel_layout_copy (&this ->audio_frame ->ch_layout , &this ->audio_codec_ctx ->ch_layout );
83
+ #else
84
+ this ->audio_frame ->channel_layout = this ->audio_codec_ctx ->channel_layout ;
85
+ #endif
86
+ this ->audio_frame ->sample_rate = this ->audio_codec_ctx ->sample_rate ;
87
+ this ->audio_frame ->nb_samples = this ->audio_codec_ctx ->frame_size ;
88
+ err = av_frame_get_buffer (this ->audio_frame , 0 );
89
+ assert (err >= 0 );
90
+ }
91
+
53
92
void VideoWriter::write (uint8_t *data, int len, long long timestamp, bool codecconfig, bool keyframe) {
54
93
if (of && data) {
55
94
size_t written = util::safe_fwrite (data, 1 , len, of);
@@ -67,8 +106,10 @@ void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecc
67
106
}
68
107
int err = avcodec_parameters_from_context (out_stream->codecpar , codec_ctx);
69
108
assert (err >= 0 );
109
+ // if there is an audio stream, it must be initialized before this point
70
110
err = avformat_write_header (ofmt_ctx, NULL );
71
111
assert (err >= 0 );
112
+ header_written = true ;
72
113
} else {
73
114
// input timestamps are in microseconds
74
115
AVRational in_timebase = {1 , 1000000 };
@@ -77,6 +118,7 @@ void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecc
77
118
av_init_packet (&pkt);
78
119
pkt.data = data;
79
120
pkt.size = len;
121
+ pkt.stream_index = this ->out_stream ->index ;
80
122
81
123
enum AVRounding rnd = static_cast <enum AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
82
124
pkt.pts = pkt.dts = av_rescale_q_rnd (timestamp, in_timebase, ofmt_ctx->streams [0 ]->time_base , rnd);
@@ -95,11 +137,80 @@ void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecc
95
137
}
96
138
}
97
139
140
+ void VideoWriter::write_audio (uint8_t *data, int len, long long timestamp, int sample_rate) {
141
+ if (!remuxing) return ;
142
+ if (!audio_initialized) {
143
+ initialize_audio (sample_rate);
144
+ audio_initialized = true ;
145
+ }
146
+ if (!audio_codec_ctx) return ;
147
+ // sync logMonoTime of first audio packet with the timestampEof of first video packet
148
+ if (audio_pts == 0 ) {
149
+ audio_pts = (timestamp * audio_codec_ctx->sample_rate ) / 1000000ULL ;
150
+ }
151
+
152
+ // convert s16le samples to fltp and add to buffer
153
+ const int16_t *raw_samples = reinterpret_cast <const int16_t *>(data);
154
+ int sample_count = len / sizeof (int16_t );
155
+ constexpr float normalizer = 1 .0f / 32768 .0f ;
156
+
157
+ const size_t max_buffer_size = sample_rate * 10 ; // 10 seconds
158
+ if (audio_buffer.size () + sample_count > max_buffer_size) {
159
+ size_t samples_to_drop = (audio_buffer.size () + sample_count) - max_buffer_size;
160
+ LOGE (" Audio buffer overflow, dropping %zu oldest samples" , samples_to_drop);
161
+ audio_buffer.erase (audio_buffer.begin (), audio_buffer.begin () + samples_to_drop);
162
+ audio_pts += samples_to_drop;
163
+ }
164
+
165
+ // Add new samples to the buffer
166
+ const size_t original_size = audio_buffer.size ();
167
+ audio_buffer.resize (original_size + sample_count);
168
+ std::transform (raw_samples, raw_samples + sample_count, audio_buffer.begin () + original_size,
169
+ [](int16_t sample) { return sample * normalizer; });
170
+
171
+ if (!header_written) return ; // header not written yet, process audio frame after header is written
172
+ while (audio_buffer.size () >= audio_codec_ctx->frame_size ) {
173
+ audio_frame->pts = audio_pts;
174
+ float *f_samples = reinterpret_cast <float *>(audio_frame->data [0 ]);
175
+ std::copy (audio_buffer.begin (), audio_buffer.begin () + audio_codec_ctx->frame_size , f_samples);
176
+ audio_buffer.erase (audio_buffer.begin (), audio_buffer.begin () + audio_codec_ctx->frame_size );
177
+ encode_and_write_audio_frame (audio_frame);
178
+ }
179
+ }
180
+
181
+ void VideoWriter::encode_and_write_audio_frame (AVFrame* frame) {
182
+ if (!remuxing || !audio_codec_ctx) return ;
183
+ int send_result = avcodec_send_frame (audio_codec_ctx, frame); // encode frame
184
+ if (send_result >= 0 ) {
185
+ AVPacket *pkt = av_packet_alloc ();
186
+ while (avcodec_receive_packet (audio_codec_ctx, pkt) == 0 ) {
187
+ av_packet_rescale_ts (pkt, audio_codec_ctx->time_base , audio_stream->time_base );
188
+ pkt->stream_index = audio_stream->index ;
189
+
190
+ int err = av_interleaved_write_frame (ofmt_ctx, pkt); // write encoded frame
191
+ if (err < 0 ) {
192
+ LOGW (" AUDIO: Write frame failed - error: %d" , err);
193
+ }
194
+ av_packet_unref (pkt);
195
+ }
196
+ av_packet_free (&pkt);
197
+ } else {
198
+ LOGW (" AUDIO: Failed to send audio frame to encoder: %d" , send_result);
199
+ }
200
+ audio_pts += audio_codec_ctx->frame_size ;
201
+ }
202
+
203
+
98
204
VideoWriter::~VideoWriter () {
99
205
if (this ->remuxing ) {
206
+ if (this ->audio_codec_ctx ) {
207
+ encode_and_write_audio_frame (NULL ); // flush encoder
208
+ avcodec_free_context (&this ->audio_codec_ctx );
209
+ }
100
210
int err = av_write_trailer (this ->ofmt_ctx );
101
211
if (err != 0 ) LOGE (" av_write_trailer failed %d" , err);
102
212
avcodec_free_context (&this ->codec_ctx );
213
+ if (this ->audio_frame ) av_frame_free (&this ->audio_frame );
103
214
err = avio_closep (&this ->ofmt_ctx ->pb );
104
215
if (err != 0 ) LOGE (" avio_closep failed %d" , err);
105
216
avformat_free_context (this ->ofmt_ctx );
0 commit comments