Skip to content

Commit 68e12b2

Browse files
author
Devdutt Shenoi
authored
refactor: only compare names (#1141)
This PR avoids unnecessary allocation by using `HashSet<String>`
1 parent 11a6378 commit 68e12b2

File tree

9 files changed

+44
-67
lines changed

9 files changed

+44
-67
lines changed

src/handlers/http/logstream.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use bytes::Bytes;
4949
use chrono::Utc;
5050
use http::{HeaderName, HeaderValue};
5151
use itertools::Itertools;
52-
use serde_json::Value;
52+
use serde_json::{json, Value};
5353
use std::collections::HashMap;
5454
use std::fs;
5555
use std::num::NonZeroU32;
@@ -104,9 +104,10 @@ pub async fn list(req: HttpRequest) -> Result<impl Responder, StreamError> {
104104
.filter(|logstream| {
105105
warn!("logstream-\n{logstream:?}");
106106

107-
Users.authorize(key.clone(), Action::ListStream, Some(&logstream.name), None)
107+
Users.authorize(key.clone(), Action::ListStream, Some(logstream), None)
108108
== crate::rbac::Response::Authorized
109109
})
110+
.map(|name| json!({"name": name}))
110111
.collect_vec();
111112

112113
Ok(web::Json(res))

src/handlers/http/modal/utils/logstream_utils.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use crate::{
3333
metadata::{self, SchemaVersion, STREAM_INFO},
3434
option::{Mode, CONFIG},
3535
static_schema::{convert_static_schema_to_arrow_schema, StaticSchema},
36-
storage::{LogStream, ObjectStoreFormat, StreamType},
36+
storage::{ObjectStoreFormat, StreamType},
3737
validator,
3838
};
3939
use tracing::error;
@@ -454,9 +454,7 @@ pub async fn create_stream_and_schema_from_storage(stream_name: &str) -> Result<
454454
// Proceed to create log stream if it doesn't exist
455455
let storage = CONFIG.storage().get_object_store();
456456
let streams = storage.list_streams().await?;
457-
if streams.contains(&LogStream {
458-
name: stream_name.to_owned(),
459-
}) {
457+
if streams.contains(stream_name) {
460458
let mut stream_metadata = ObjectStoreFormat::default();
461459
let stream_metadata_bytes = storage.create_stream_from_ingestor(stream_name).await?;
462460
if !stream_metadata_bytes.is_empty() {

src/handlers/http/query.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,7 @@ pub async fn create_streams_for_querier() {
191191
let querier_streams = STREAM_INFO.list_streams();
192192
let store = CONFIG.storage().get_object_store();
193193
let storage_streams = store.list_streams().await.unwrap();
194-
for stream in storage_streams {
195-
let stream_name = stream.name;
196-
194+
for stream_name in storage_streams {
197195
if !querier_streams.contains(&stream_name) {
198196
let _ = create_stream_and_schema_from_storage(&stream_name).await;
199197
}

src/migration/mod.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ use crate::{
3434
};
3535
use arrow_schema::Schema;
3636
use bytes::Bytes;
37-
use itertools::Itertools;
3837
use relative_path::RelativePathBuf;
3938
use serde::Serialize;
4039
use serde_json::Value;
@@ -133,9 +132,8 @@ pub async fn run_metadata_migration(
133132
/// run the migration for all streams
134133
pub async fn run_migration(config: &Config) -> anyhow::Result<()> {
135134
let storage = config.storage().get_object_store();
136-
let streams = storage.list_streams().await?;
137-
for stream in streams {
138-
migration_stream(&stream.name, &*storage).await?;
135+
for stream_name in storage.list_streams().await? {
136+
migration_stream(&stream_name, &*storage).await?;
139137
}
140138

141139
Ok(())
@@ -357,12 +355,7 @@ async fn run_meta_file_migration(
357355
}
358356

359357
async fn run_stream_files_migration(object_store: &Arc<dyn ObjectStorage>) -> anyhow::Result<()> {
360-
let streams = object_store
361-
.list_old_streams()
362-
.await?
363-
.into_iter()
364-
.map(|stream| stream.name)
365-
.collect_vec();
358+
let streams = object_store.list_old_streams().await?;
366359

367360
for stream in streams {
368361
let paths = object_store.get_stream_file_paths(&stream).await?;

src/storage/azure_blob.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use crate::metrics::storage::azureblob::REQUEST_RESPONSE_TIME;
4242
use crate::metrics::storage::StorageMetrics;
4343
use object_store::limit::LimitStore;
4444
use object_store::path::Path as StorePath;
45-
use std::collections::{BTreeMap, HashMap};
45+
use std::collections::{BTreeMap, HashMap, HashSet};
4646
use std::sync::Arc;
4747
use std::time::{Duration, Instant};
4848

@@ -266,8 +266,8 @@ impl BlobStore {
266266
Ok(())
267267
}
268268

269-
async fn _list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
270-
let mut result_file_list: Vec<LogStream> = Vec::new();
269+
async fn _list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
270+
let mut result_file_list = HashSet::new();
271271
let resp = self.client.list_with_delimiter(None).await?;
272272

273273
let streams = resp
@@ -287,7 +287,7 @@ impl BlobStore {
287287
.iter()
288288
.any(|name| name.location.filename().unwrap().ends_with("stream.json"))
289289
{
290-
result_file_list.push(LogStream { name: stream });
290+
result_file_list.insert(stream);
291291
}
292292
}
293293

@@ -573,19 +573,17 @@ impl ObjectStorage for BlobStore {
573573
}
574574
}
575575

576-
async fn list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
577-
let streams = self._list_streams().await?;
578-
579-
Ok(streams)
576+
async fn list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
577+
self._list_streams().await
580578
}
581579

582-
async fn list_old_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
580+
async fn list_old_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
583581
let resp = self.client.list_with_delimiter(None).await?;
584582

585583
let common_prefixes = resp.common_prefixes; // get all dirs
586584

587585
// return prefixes at the root level
588-
let dirs: Vec<_> = common_prefixes
586+
let dirs: HashSet<_> = common_prefixes
589587
.iter()
590588
.filter_map(|path| path.parts().next())
591589
.map(|name| name.as_ref().to_string())
@@ -602,7 +600,7 @@ impl ObjectStorage for BlobStore {
602600

603601
stream_json_check.try_collect::<()>().await?;
604602

605-
Ok(dirs.into_iter().map(|name| LogStream { name }).collect())
603+
Ok(dirs.into_iter().collect())
606604
}
607605

608606
async fn list_dates(&self, stream_name: &str) -> Result<Vec<String>, ObjectStorageError> {

src/storage/localfs.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
*/
1818

1919
use std::{
20-
collections::{BTreeMap, HashMap},
20+
collections::{BTreeMap, HashMap, HashSet},
2121
path::{Path, PathBuf},
2222
sync::Arc,
2323
time::Instant,
@@ -295,7 +295,7 @@ impl ObjectStorage for LocalFS {
295295
Ok(fs::remove_file(path).await?)
296296
}
297297

298-
async fn list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
298+
async fn list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
299299
let ignore_dir = &[
300300
"lost+found",
301301
PARSEABLE_ROOT_DIRECTORY,
@@ -311,16 +311,12 @@ impl ObjectStorage for LocalFS {
311311
let logstream_dirs: Vec<Option<String>> =
312312
FuturesUnordered::from_iter(entries).try_collect().await?;
313313

314-
let logstreams = logstream_dirs
315-
.into_iter()
316-
.flatten()
317-
.map(|name| LogStream { name })
318-
.collect();
314+
let logstreams = logstream_dirs.into_iter().flatten().collect();
319315

320316
Ok(logstreams)
321317
}
322318

323-
async fn list_old_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
319+
async fn list_old_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
324320
let ignore_dir = &[
325321
"lost+found",
326322
PARSEABLE_ROOT_DIRECTORY,
@@ -335,11 +331,7 @@ impl ObjectStorage for LocalFS {
335331
let logstream_dirs: Vec<Option<String>> =
336332
FuturesUnordered::from_iter(entries).try_collect().await?;
337333

338-
let logstreams = logstream_dirs
339-
.into_iter()
340-
.flatten()
341-
.map(|name| LogStream { name })
342-
.collect();
334+
let logstreams = logstream_dirs.into_iter().flatten().collect();
343335

344336
Ok(logstreams)
345337
}

src/storage/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ pub use store_metadata::{
4848
put_remote_metadata, put_staging_metadata, resolve_parseable_metadata, StorageMetadata,
4949
};
5050

51+
/// Name of a Stream
52+
/// NOTE: this used to be a struct, flattened out for simplicity
53+
pub type LogStream = String;
54+
5155
// metadata file names in a Stream prefix
5256
pub const STREAM_METADATA_FILE_NAME: &str = ".stream.json";
5357
pub const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json";
@@ -225,11 +229,6 @@ impl Default for ObjectStoreFormat {
225229
}
226230
}
227231

228-
#[derive(serde::Serialize, PartialEq, Debug)]
229-
pub struct LogStream {
230-
pub name: String,
231-
}
232-
233232
#[derive(Debug, thiserror::Error)]
234233
pub enum ObjectStorageError {
235234
// no such key inside the object storage

src/storage/object_storage.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@
1717
*/
1818

1919
use super::{
20-
retention::Retention, staging::convert_disk_files_to_parquet, LogStream, ObjectStorageError,
20+
retention::Retention, staging::convert_disk_files_to_parquet, ObjectStorageError,
2121
ObjectStoreFormat, Permisssion, StorageDir, StorageMetadata,
2222
};
2323
use super::{
24-
Owner, StreamType, ALERTS_ROOT_DIRECTORY, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME,
25-
PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY,
24+
LogStream, Owner, StreamType, ALERTS_ROOT_DIRECTORY, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY
2625
};
2726

2827
use crate::alerts::AlertConfig;
@@ -52,7 +51,7 @@ use relative_path::RelativePathBuf;
5251
use tracing::{error, warn};
5352
use ulid::Ulid;
5453

55-
use std::collections::BTreeMap;
54+
use std::collections::{BTreeMap, HashSet};
5655
use std::fmt::Debug;
5756
use std::num::NonZeroU32;
5857
use std::{
@@ -92,8 +91,8 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static {
9291
async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError>;
9392
async fn check(&self) -> Result<(), ObjectStorageError>;
9493
async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError>;
95-
async fn list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError>;
96-
async fn list_old_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError>;
94+
async fn list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError>;
95+
async fn list_old_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError>;
9796
async fn list_dirs(&self) -> Result<Vec<String>, ObjectStorageError>;
9897
async fn get_all_saved_filters(
9998
&self,

src/storage/s3.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use object_store::{BackoffConfig, ClientOptions, ObjectStore, PutPayload, RetryC
3232
use relative_path::{RelativePath, RelativePathBuf};
3333
use tracing::{error, info};
3434

35-
use std::collections::BTreeMap;
35+
use std::collections::{BTreeMap, HashSet};
3636
use std::fmt::Display;
3737
use std::iter::Iterator;
3838
use std::path::Path as StdPath;
@@ -43,11 +43,12 @@ use std::time::{Duration, Instant};
4343
use super::metrics_layer::MetricLayer;
4444
use super::object_storage::parseable_json_path;
4545
use super::{
46-
ObjectStorageProvider, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY,
46+
LogStream, ObjectStorageProvider, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME,
47+
STREAM_ROOT_DIRECTORY,
4748
};
4849
use crate::handlers::http::users::USERS_ROOT_DIR;
4950
use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics};
50-
use crate::storage::{LogStream, ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY};
51+
use crate::storage::{ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY};
5152
use std::collections::HashMap;
5253

5354
// in bytes
@@ -402,8 +403,8 @@ impl S3 {
402403
Ok(())
403404
}
404405

405-
async fn _list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
406-
let mut result_file_list: Vec<LogStream> = Vec::new();
406+
async fn _list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
407+
let mut result_file_list = HashSet::new();
407408
let resp = self.client.list_with_delimiter(None).await?;
408409

409410
let streams = resp
@@ -423,7 +424,7 @@ impl S3 {
423424
.iter()
424425
.any(|name| name.location.filename().unwrap().ends_with("stream.json"))
425426
{
426-
result_file_list.push(LogStream { name: stream });
427+
result_file_list.insert(stream);
427428
}
428429
}
429430

@@ -709,19 +710,17 @@ impl ObjectStorage for S3 {
709710
}
710711
}
711712

712-
async fn list_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
713-
let streams = self._list_streams().await?;
714-
715-
Ok(streams)
713+
async fn list_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
714+
self._list_streams().await
716715
}
717716

718-
async fn list_old_streams(&self) -> Result<Vec<LogStream>, ObjectStorageError> {
717+
async fn list_old_streams(&self) -> Result<HashSet<LogStream>, ObjectStorageError> {
719718
let resp = self.client.list_with_delimiter(None).await?;
720719

721720
let common_prefixes = resp.common_prefixes; // get all dirs
722721

723722
// return prefixes at the root level
724-
let dirs: Vec<_> = common_prefixes
723+
let dirs: HashSet<_> = common_prefixes
725724
.iter()
726725
.filter_map(|path| path.parts().next())
727726
.map(|name| name.as_ref().to_string())
@@ -738,7 +737,7 @@ impl ObjectStorage for S3 {
738737

739738
stream_json_check.try_collect::<()>().await?;
740739

741-
Ok(dirs.into_iter().map(|name| LogStream { name }).collect())
740+
Ok(dirs)
742741
}
743742

744743
async fn list_dates(&self, stream_name: &str) -> Result<Vec<String>, ObjectStorageError> {

0 commit comments

Comments
 (0)