Skip to content

Commit 1569451

Browse files
committed
initialize audio separately and pass sample_rate in
1 parent 6b951cf commit 1569451

File tree

3 files changed

+74
-57
lines changed

3 files changed

+74
-57
lines changed

system/loggerd/loggerd.cc

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct RemoteEncoder {
6262
bool recording = false;
6363
bool marked_ready_to_rotate = false;
6464
bool seen_first_packet = false;
65+
bool audio_initialized = false;
6566
};
6667

6768
size_t write_encode_data(LoggerdState *s, cereal::Event::Reader event, RemoteEncoder &re, const EncoderInfo &encoder_info) {
@@ -80,11 +81,6 @@ size_t write_encode_data(LoggerdState *s, cereal::Event::Reader event, RemoteEnc
8081
}
8182
// if we aren't actually recording, don't create the writer
8283
if (encoder_info.record) {
83-
assert(encoder_info.filename != NULL);
84-
re.writer.reset(new VideoWriter(s->logger.segmentPath().c_str(),
85-
encoder_info.filename, idx.getType() != cereal::EncodeIndex::Type::FULL_H_E_V_C,
86-
edata.getWidth(), edata.getHeight(), encoder_info.fps, idx.getType(),
87-
encoder_info.include_audio));
8884
// write the header
8985
auto header = edata.getHeader();
9086
re.writer->write((uint8_t *)header.begin(), header.size(), idx.getTimestampEof() / 1000, true, false);
@@ -139,13 +135,19 @@ int handle_encoder_msg(LoggerdState *s, Message *msg, std::string &name, struct
139135

140136
// if this is a new segment, we close any possible old segments, move to the new, and process any queued packets
141137
if (re.current_segment != s->logger.segment()) {
142-
if (re.recording) {
143-
re.writer.reset();
138+
if (encoder_info.record) {
139+
assert(encoder_info.filename != NULL);
140+
re.writer.reset(new VideoWriter(s->logger.segmentPath().c_str(),
141+
encoder_info.filename, idx.getType() != cereal::EncodeIndex::Type::FULL_H_E_V_C,
142+
edata.getWidth(), edata.getHeight(), encoder_info.fps, idx.getType()));
144143
re.recording = false;
144+
re.audio_initialized = false;
145145
}
146146
re.current_segment = s->logger.segment();
147147
re.marked_ready_to_rotate = false;
148148
// we are in this segment now, process any queued messages before this one
149+
}
150+
if (re.audio_initialized || !encoder_info.include_audio) {
149151
if (!re.q.empty()) {
150152
for (auto qmsg : re.q) {
151153
capnp::FlatArrayMessageReader reader({(capnp::word *)qmsg->getData(), qmsg->getSize() / sizeof(capnp::word)});
@@ -154,9 +156,14 @@ int handle_encoder_msg(LoggerdState *s, Message *msg, std::string &name, struct
154156
}
155157
re.q.clear();
156158
}
159+
bytes_count += write_encode_data(s, event, re, encoder_info);
160+
delete msg;
161+
} else if (re.q.size() > MAIN_FPS*10) {
162+
LOGE_100("%s: dropping frame waiting for audio initialization, queue is too large", name.c_str());
163+
delete msg;
164+
} else {
165+
re.q.push_back(msg); // queue up all the new segment messages, they go in after audio is initialized
157166
}
158-
bytes_count += write_encode_data(s, event, re, encoder_info);
159-
delete msg;
160167
} else if (offset_segment_num > s->logger.segment()) {
161168
// encoderd packet has a newer segment, this means encoderd has rolled over
162169
if (!re.marked_ready_to_rotate) {
@@ -288,9 +295,11 @@ void loggerd_thread() {
288295
capnp::FlatArrayMessageReader cmsg(kj::ArrayPtr<capnp::word>((capnp::word *)msg->getData(), msg->getSize() / sizeof(capnp::word)));
289296
auto event = cmsg.getRoot<cereal::Event>();
290297
auto audio_data = event.getRawAudioData().getData();
298+
auto sample_rate = event.getRawAudioData().getSampleRate();
291299
for (auto* encoder : encoders_with_audio) {
292300
if (encoder && encoder->writer) {
293-
encoder->writer->write_audio((uint8_t*)audio_data.begin(), audio_data.size(), event.getLogMonoTime() / 1000);
301+
encoder->writer->write_audio((uint8_t*)audio_data.begin(), audio_data.size(), event.getLogMonoTime() / 1000, sample_rate);
302+
encoder->audio_initialized = true;
294303
}
295304
}
296305
}

system/loggerd/video_writer.cc

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "common/swaglog.h"
66
#include "common/util.h"
77

8-
VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing, int width, int height, int fps, cereal::EncodeIndex::Type codec, bool include_audio)
8+
VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing, int width, int height, int fps, cereal::EncodeIndex::Type codec)
99
: remuxing(remuxing) {
1010
vid_path = util::string_format("%s/%s", path, filename);
1111
lock_path = util::string_format("%s/%s.lock", path, filename);
@@ -41,45 +41,6 @@ VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing,
4141
this->out_stream = avformat_new_stream(this->ofmt_ctx, raw ? avcodec : NULL);
4242
assert(this->out_stream);
4343

44-
if (include_audio) {
45-
assert(this->ofmt_ctx->oformat->audio_codec != AV_CODEC_ID_NONE); // check output format supports audio streams
46-
const AVCodec *audio_avcodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
47-
assert(audio_avcodec);
48-
this->audio_codec_ctx = avcodec_alloc_context3(audio_avcodec);
49-
assert(this->audio_codec_ctx);
50-
this->audio_codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
51-
this->audio_codec_ctx->sample_rate = 16000; // from system/micd.py
52-
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
53-
av_channel_layout_default(&this->audio_codec_ctx->ch_layout, 1);
54-
#else
55-
this->audio_codec_ctx->channel_layout = AV_CH_LAYOUT_MONO;
56-
#endif
57-
this->audio_codec_ctx->bit_rate = 32000;
58-
this->audio_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
59-
this->audio_codec_ctx->time_base = (AVRational){1, audio_codec_ctx->sample_rate};
60-
int err = avcodec_open2(this->audio_codec_ctx, audio_avcodec, NULL);
61-
assert(err >= 0);
62-
av_log_set_level(AV_LOG_WARNING); // hide "QAvg" info msgs at the end of every segment
63-
64-
this->audio_stream = avformat_new_stream(this->ofmt_ctx, NULL);
65-
assert(this->audio_stream);
66-
err = avcodec_parameters_from_context(this->audio_stream->codecpar, this->audio_codec_ctx);
67-
assert(err >= 0);
68-
69-
this->audio_frame = av_frame_alloc();
70-
assert(this->audio_frame);
71-
this->audio_frame->format = this->audio_codec_ctx->sample_fmt;
72-
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
73-
av_channel_layout_copy(&this->audio_frame->ch_layout, &this->audio_codec_ctx->ch_layout);
74-
#else
75-
this->audio_frame->channel_layout = this->audio_codec_ctx->channel_layout;
76-
#endif
77-
this->audio_frame->sample_rate = this->audio_codec_ctx->sample_rate;
78-
this->audio_frame->nb_samples = this->audio_codec_ctx->frame_size;
79-
err = av_frame_get_buffer(this->audio_frame, 0);
80-
assert(err >= 0);
81-
}
82-
8344
int err = avio_open(&this->ofmt_ctx->pb, this->vid_path.c_str(), AVIO_FLAG_WRITE);
8445
assert(err >= 0);
8546

@@ -89,6 +50,45 @@ VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing,
8950
}
9051
}
9152

53+
void VideoWriter::initialize_audio(int sample_rate) {
54+
assert(this->ofmt_ctx->oformat->audio_codec != AV_CODEC_ID_NONE); // check output format supports audio streams
55+
const AVCodec *audio_avcodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
56+
assert(audio_avcodec);
57+
this->audio_codec_ctx = avcodec_alloc_context3(audio_avcodec);
58+
assert(this->audio_codec_ctx);
59+
this->audio_codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
60+
this->audio_codec_ctx->sample_rate = sample_rate;
61+
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
62+
av_channel_layout_default(&this->audio_codec_ctx->ch_layout, 1);
63+
#else
64+
this->audio_codec_ctx->channel_layout = AV_CH_LAYOUT_MONO;
65+
#endif
66+
this->audio_codec_ctx->bit_rate = 32000;
67+
this->audio_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
68+
this->audio_codec_ctx->time_base = (AVRational){1, audio_codec_ctx->sample_rate};
69+
int err = avcodec_open2(this->audio_codec_ctx, audio_avcodec, NULL);
70+
assert(err >= 0);
71+
av_log_set_level(AV_LOG_WARNING); // hide "QAvg" info msgs at the end of every segment
72+
73+
this->audio_stream = avformat_new_stream(this->ofmt_ctx, NULL);
74+
assert(this->audio_stream);
75+
err = avcodec_parameters_from_context(this->audio_stream->codecpar, this->audio_codec_ctx);
76+
assert(err >= 0);
77+
78+
this->audio_frame = av_frame_alloc();
79+
assert(this->audio_frame);
80+
this->audio_frame->format = this->audio_codec_ctx->sample_fmt;
81+
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+
82+
av_channel_layout_copy(&this->audio_frame->ch_layout, &this->audio_codec_ctx->ch_layout);
83+
#else
84+
this->audio_frame->channel_layout = this->audio_codec_ctx->channel_layout;
85+
#endif
86+
this->audio_frame->sample_rate = this->audio_codec_ctx->sample_rate;
87+
this->audio_frame->nb_samples = this->audio_codec_ctx->frame_size;
88+
err = av_frame_get_buffer(this->audio_frame, 0);
89+
assert(err >= 0);
90+
}
91+
9292
void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecconfig, bool keyframe) {
9393
if (of && data) {
9494
size_t written = util::safe_fwrite(data, 1, len, of);
@@ -106,9 +106,9 @@ void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecc
106106
}
107107
int err = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx);
108108
assert(err >= 0);
109-
err = avformat_write_header(ofmt_ctx, NULL);
110-
assert(err >= 0);
111-
} else {
109+
err = avformat_write_header(ofmt_ctx, NULL);
110+
assert(err >= 0);
111+
} else {
112112
// input timestamps are in microseconds
113113
AVRational in_timebase = {1, 1000000};
114114

@@ -135,8 +135,13 @@ void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecc
135135
}
136136
}
137137

138-
void VideoWriter::write_audio(uint8_t *data, int len, long long timestamp) {
139-
if (!remuxing || !audio_codec_ctx) return;
138+
void VideoWriter::write_audio(uint8_t *data, int len, long long timestamp, int sample_rate) {
139+
if (!remuxing) return;
140+
if (!audio_initialized) {
141+
initialize_audio(sample_rate);
142+
audio_initialized = true;
143+
}
144+
if (!audio_codec_ctx) return;
140145

141146
// sync logMonoTime of first audio packet with the timestampEof of first video packet
142147
if (audio_pts == 0) {
@@ -174,6 +179,7 @@ void VideoWriter::encode_and_write_audio_frame(AVFrame* frame) {
174179
if (err < 0) {
175180
LOGW("AUDIO: Write frame failed - error: %d", err);
176181
}
182+
av_packet_unref(pkt);
177183
}
178184
av_packet_free(&pkt);
179185
} else {

system/loggerd/video_writer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@ extern "C" {
1212

1313
class VideoWriter {
1414
public:
15-
VideoWriter(const char *path, const char *filename, bool remuxing, int width, int height, int fps, cereal::EncodeIndex::Type codec, bool include_audio);
15+
VideoWriter(const char *path, const char *filename, bool remuxing, int width, int height, int fps, cereal::EncodeIndex::Type codec);
1616
void write(uint8_t *data, int len, long long timestamp, bool codecconfig, bool keyframe);
17-
void write_audio(uint8_t *data, int len, long long timestamp);
17+
void write_audio(uint8_t *data, int len, long long timestamp, int sample_rate);
1818

1919
~VideoWriter();
2020

2121
private:
22+
void initialize_audio(int sample_rate);
2223
void encode_and_write_audio_frame(AVFrame* frame);
2324

2425
std::string vid_path, lock_path;
@@ -28,6 +29,7 @@ class VideoWriter {
2829
AVFormatContext *ofmt_ctx;
2930
AVStream *out_stream;
3031

32+
bool audio_initialized = false;
3133
AVStream *audio_stream = nullptr;
3234
AVCodecContext *audio_codec_ctx = nullptr;
3335
AVFrame *audio_frame = nullptr;

0 commit comments

Comments
 (0)