Skip to content

Commit 4e2ea94

Browse files
YLouWashUfacebook-github-bot
authored andcommitted
{Feature} Properly support random access for encoded audio signals.
Summary: This diff adds a "proper" solution to deal with random accessing Audio with Opus encoding. The core idea is, every time when `readRecordByIndex()` is called for Opus Audio stream, the index is compared with the previously-read index: 1. If new_index == prev_index, no need to pre-roll. Audio Player will simply return the cached data. 2. If new_index == prev_index + 1, no need to pre-roll. Just do normal decoding. 3. Else, pre-roll is needed. 100ms is pre-fetched before this index. Reviewed By: kongchen1992 Differential Revision: D83426928 fbshipit-source-id: 93e312e381978fef58a9c38b0fc4251d0811e9bc
1 parent 4cbd3cd commit 4e2ea94

File tree

5 files changed

+147
-4
lines changed

5 files changed

+147
-4
lines changed

core/data_provider/RecordReaderInterface.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ RecordReaderInterface::RecordReaderInterface(
231231
}
232232
fileTags_ = reader_->getTags();
233233
vrsMetadata_ = getMetadata();
234+
235+
// Initialize Opus detection cache
236+
initializeOpusDetection();
234237
}
235238

236239
std::set<vrs::StreamId> RecordReaderInterface::getStreamIds() const {
@@ -327,15 +330,37 @@ const vrs::IndexRecord::RecordInfo* RecordReaderInterface::readRecordByIndex(
327330
const int index) {
328331
std::lock_guard<std::mutex> lockGuard(*readerMutex_);
329332

333+
// Check bounds first to prevent invalid pre-roll attempts
330334
if (index < 0 || index >= reader_->getRecordCount(streamId, vrs::Record::Type::DATA)) {
331335
return nullptr;
332336
}
337+
338+
// Handle Opus audio pre-roll before reading the target record
339+
if (getSensorDataType(streamId) == SensorDataType::Audio && isOpusAudioStream(streamId) &&
340+
needsOpusPreroll(streamId, index)) {
341+
performOpusPreroll(streamId, index);
342+
}
333343
const vrs::IndexRecord::RecordInfo* recordInfo =
334344
reader_->getRecord(streamId, vrs::Record::Type::DATA, static_cast<uint32_t>(index));
335345
checkAndThrow(
336346
recordInfo, fmt::format("getRecord failed for {}, index {}", streamId.getName(), index));
337347
const int errorCode = reader_->readRecord(*recordInfo);
338348
streamIdToLastReadRecord_[streamId] = recordInfo;
349+
350+
// Update tracking for audio streams after successful read
351+
if (getSensorDataType(streamId) == SensorDataType::Audio) {
352+
audioStreamLastReadIndex_[streamId] = index;
353+
354+
// Update Opus detection cache after first successful read
355+
auto audioPlayerIt = audioPlayers_.find(streamId);
356+
if (audioPlayerIt != audioPlayers_.end()) {
357+
vrs::AudioFormat detectedFormat = audioPlayerIt->second->getDetectedAudioFormat();
358+
if (detectedFormat != vrs::AudioFormat::UNDEFINED) {
359+
updateOpusDetection(streamId, detectedFormat == vrs::AudioFormat::OPUS);
360+
}
361+
}
362+
}
363+
339364
if (errorCode != 0) {
340365
XR_LOGE(
341366
"Fail to read record {} from streamId {} with code {}",
@@ -684,4 +709,62 @@ OnDeviceHandPoseData RecordReaderInterface::getLastCachedHandPoseData(
684709
return handPoseData;
685710
}
686711

712+
bool RecordReaderInterface::needsOpusPreroll(const vrs::StreamId& streamId, int targetIndex) {
713+
auto lastIndexIt = audioStreamLastReadIndex_.find(streamId);
714+
715+
if (lastIndexIt == audioStreamLastReadIndex_.end()) {
716+
// First read for this stream - need pre-roll if not starting from index 0
717+
return targetIndex > 0;
718+
}
719+
720+
int lastIndex = lastIndexIt->second;
721+
// Need pre-roll if not sequential (not lastIndex + 1) and not same index
722+
return (targetIndex != lastIndex + 1 && targetIndex != lastIndex);
723+
}
724+
725+
void RecordReaderInterface::performOpusPreroll(const vrs::StreamId& streamId, int targetIndex) {
726+
// Reset Opus decoder first
727+
resetOpusDecoder(streamId);
728+
729+
// Read previous frames for pre-roll (but don't update lastReadIndex yet)
730+
int prerollStart = std::max(0, targetIndex - kAudioDecodingPrerollLength);
731+
for (int prerollIndex = prerollStart; prerollIndex < targetIndex; ++prerollIndex) {
732+
const vrs::IndexRecord::RecordInfo* prerollRecord =
733+
reader_->getRecord(streamId, vrs::Record::Type::DATA, static_cast<uint32_t>(prerollIndex));
734+
if (prerollRecord) {
735+
reader_->readRecord(*prerollRecord);
736+
// Audio data is processed by the player but we don't cache or return it
737+
}
738+
}
739+
}
740+
741+
bool RecordReaderInterface::isOpusAudioStream(const vrs::StreamId& streamId) {
742+
// Use cached Opus detection result
743+
auto it = audioStreamIsOpus_.find(streamId);
744+
return it != audioStreamIsOpus_.end() && it->second;
745+
}
746+
747+
void RecordReaderInterface::initializeOpusDetection() {
748+
// Initialize all audio streams as undetermined (will be detected lazily on first read)
749+
for (const auto& [streamId, audioPlayer] : audioPlayers_) {
750+
// Initialize as undetermined - will be detected during first actual read
751+
// when the AudioPlayer's onAudioRead() method can access the content block format
752+
audioStreamIsOpus_[streamId] = false;
753+
}
754+
}
755+
756+
void RecordReaderInterface::updateOpusDetection(const vrs::StreamId& streamId, bool isOpus) {
757+
audioStreamIsOpus_[streamId] = isOpus;
758+
}
759+
760+
void RecordReaderInterface::resetOpusDecoder(const vrs::StreamId& streamId) {
761+
if (audioPlayers_.find(streamId) != audioPlayers_.end()) {
762+
audioPlayers_[streamId]->resetOpusDecoder();
763+
} else {
764+
fmt::print(
765+
"Warning: streamId {} not found in audioPlayers_, Opus decoder cannot be reset\n",
766+
streamId.getNumericName());
767+
}
768+
}
769+
687770
} // namespace projectaria::tools::data_provider

core/data_provider/RecordReaderInterface.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,36 @@ class RecordReaderInterface {
105105

106106
[[nodiscard]] std::optional<MetadataTimeSyncMode> getTimeSyncMode() const;
107107

108+
// Explicit function to reset AudioPlayer's Opus decoder. This is needed to support random access
109+
// for Audio data
110+
void resetOpusDecoder(const vrs::StreamId& streamId);
111+
108112
private:
113+
// Parameter to control the "pre-roll" length for encoded audio signal.
114+
// 5 samples stands for approx 100ms, which is enough for pre-roll
115+
static constexpr int kAudioDecodingPrerollLength = 5;
116+
117+
// Track last read index per audio stream for Opus pre-roll detection
118+
std::map<vrs::StreamId, int> audioStreamLastReadIndex_;
119+
120+
// Cache which audio streams use Opus encoding (determined during initialization)
121+
std::map<vrs::StreamId, bool> audioStreamIsOpus_;
122+
123+
// Helper method to determine if audio stream needs Opus pre-roll
124+
bool needsOpusPreroll(const vrs::StreamId& streamId, int targetIndex);
125+
126+
// Helper method to perform Opus pre-roll
127+
void performOpusPreroll(const vrs::StreamId& streamId, int targetIndex);
128+
129+
// Check if audio stream uses Opus encoding (cached lookup)
130+
bool isOpusAudioStream(const vrs::StreamId& streamId);
131+
132+
// Initialize Opus detection cache during construction
133+
void initializeOpusDetection();
134+
135+
// Update Opus detection cache for a specific stream
136+
void updateOpusDetection(const vrs::StreamId& streamId, bool isOpus);
137+
109138
std::shared_ptr<vrs::MultiRecordFileReader> reader_;
110139

111140
calibration::DeviceVersion deviceVersion_;

core/data_provider/VrsDataProvider.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,12 @@ SensorData VrsDataProvider::getSensorDataByTimeNs(
526526
const TimeDomain& timeDomain,
527527
const TimeQueryOptions& timeQueryOptions) {
528528
const int index = getIndexByTimeNs(streamId, timeNs, timeDomain, timeQueryOptions);
529+
530+
// Check for out-of-bounds index and return NotValid SensorData
531+
if (index < 0) {
532+
return SensorData(streamId, std::monostate{}, SensorDataType::NotValid, -1, {});
533+
}
534+
529535
return getSensorDataByIndex(streamId, index);
530536
}
531537

core/data_provider/players/AudioPlayer.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ bool AudioPlayer::onAudioRead(
6262
const vrs::ContentBlock& cb) {
6363
const auto& audioSpec = cb.audio();
6464

65+
// Detect and store audio format on first read
66+
if (detectedAudioFormat_ == vrs::AudioFormat::UNDEFINED) {
67+
detectedAudioFormat_ = audioSpec.getAudioFormat();
68+
}
69+
6570
// Read Opus-encoded audio data with decoding
6671
if (audioSpec.getAudioFormat() == vrs::AudioFormat::OPUS &&
6772
audioSpec.getSampleFormat() == vrs::AudioSampleFormat::S16_LE) {
@@ -129,12 +134,10 @@ bool AudioPlayer::readAndDecodeAudioData(const vrs::CurrentRecord& r, const vrs:
129134
}
130135

131136
if (needsReset) {
132-
opus_multistream_decoder_destroy(opusDecoder_);
133-
opusDecoder_ = nullptr;
134-
lastDecodedTimestamp_ = -1.0; // Reset timestamp tracking
137+
resetOpusDecoder();
135138
}
136139

137-
// Create decoder only if needed (reuse existing decoder when possible)
140+
// Initialize decoder if not yet initialized
138141
if (opusDecoder_ == nullptr) {
139142
// Separate mono and coupled channels
140143
uint32_t totalCoupledAudioChannel = kStereoMultiplier * audioSpec.getStereoPairCount();
@@ -224,6 +227,7 @@ bool AudioPlayer::readAndDecodeAudioData(const vrs::CurrentRecord& r, const vrs:
224227
lastDecodedTimestamp_ = currentTimestamp;
225228

226229
callback_(data_, dataRecord_, configRecord_, verbose_);
230+
227231
return true;
228232
} else {
229233
// Error: result contains error code
@@ -232,4 +236,13 @@ bool AudioPlayer::readAndDecodeAudioData(const vrs::CurrentRecord& r, const vrs:
232236
}
233237
}
234238

239+
void AudioPlayer::resetOpusDecoder() {
240+
// Destroy existing decoder
241+
if (opusDecoder_ != nullptr) {
242+
opus_multistream_decoder_destroy(opusDecoder_);
243+
opusDecoder_ = nullptr;
244+
lastDecodedTimestamp_ = -1.0; // Reset timestamp tracking
245+
}
246+
}
247+
235248
} // namespace projectaria::tools::data_provider

core/data_provider/players/AudioPlayer.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ class AudioPlayer : public vrs::RecordFormatStreamPlayer {
105105
verbose_ = verbose;
106106
}
107107

108+
// Get the detected audio format (available after first read)
109+
vrs::AudioFormat getDetectedAudioFormat() const {
110+
return detectedAudioFormat_;
111+
}
112+
113+
// A function to explicitly reset the Opus decoder. Note that this DOES NOT initialize the
114+
// decoder!
115+
void resetOpusDecoder();
116+
108117
protected:
109118
bool onDataLayoutRead(const vrs::CurrentRecord& r, size_t blockIndex, vrs::DataLayout& dl)
110119
override;
@@ -133,6 +142,9 @@ class AudioPlayer : public vrs::RecordFormatStreamPlayer {
133142
OpusMSDecoder* opusDecoder_ = nullptr;
134143
vrs::AudioContentBlockSpec lastDecoderSpec_; // Track spec compatibility
135144
double lastDecodedTimestamp_ = -1.0; // Track last decoded timestamp for random access detection
145+
146+
// Track detected audio format (set on first audio read)
147+
vrs::AudioFormat detectedAudioFormat_ = vrs::AudioFormat::UNDEFINED;
136148
};
137149

138150
} // namespace projectaria::tools::data_provider

0 commit comments

Comments
 (0)