diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 19c4e7a2b5dd6..49d4aebc08480 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -2822,7 +2822,7 @@ dependencies = [ "parquet", "rand 0.8.5", "regex", - "sqlparser", + "sqlparser 0.55.0", "tempfile", "tokio", "url", @@ -2899,7 +2899,7 @@ dependencies = [ "parquet", "paste", "recursive", - "sqlparser", + "sqlparser 0.55.0", "tokio", "web-time", ] @@ -3075,7 +3075,7 @@ dependencies = [ "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.55.0", ] [[package]] @@ -3371,7 +3371,7 @@ dependencies = [ "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.55.0", ] [[package]] @@ -8602,15 +8602,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" -dependencies = [ - "memchr", -] - [[package]] name = "notify" version = "6.1.1" @@ -12264,6 +12255,17 @@ dependencies = [ "sqlparser_derive", ] +[[package]] +name = "sqlparser" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + [[package]] name = "sqlparser_derive" version = "0.3.0" @@ -15184,7 +15186,6 @@ dependencies = [ "lazy_static", "libloading 0.8.9", "memchr", - "nom 8.0.0", "object_store", "once_cell", "pep440_rs", @@ -15705,11 +15706,11 @@ version = "1.590.0" dependencies = [ "anyhow", "lazy_static", - "nom 8.0.0", "regex", "regex-lite", "serde", "serde_json", + "sqlparser 0.59.0", "windmill-parser", ] diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 322c61c61791c..d2e1336f345ec 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -158,7 +158,6 @@ kube.workspace = true k8s-openapi.workspace = true libloading.workspace = true bitflags.workspace = true -nom.workspace = true globset.workspace = true @@ -378,7 +377,6 @@ pg_escape = "0.1.1" async-nats = "0.38.0" nkeys = "0.4.4" nu-parser = { version = "0.101.0", default-features = false } -nom = "8.0.0" globset = "0.4.16" process-wrap = { version = "8.2.1", features = ["tokio1"] } diff --git a/backend/parsers/windmill-parser-py/src/asset_parser.rs b/backend/parsers/windmill-parser-py/src/asset_parser.rs index 17a898655bda8..93f3a0cd5784c 100644 --- a/backend/parsers/windmill-parser-py/src/asset_parser.rs +++ b/backend/parsers/windmill-parser-py/src/asset_parser.rs @@ -23,9 +23,12 @@ pub fn parse_assets(input: &str) -> anyhow::Result .iter() .all(|a| !(a.kind == kind && a.path == name)) { - assets_finder - .assets - .push(ParseAssetsResult { kind, access_type: None, path: name }); + assets_finder.assets.push(ParseAssetsResult { + kind, + access_type: None, + path: name, + specific_table: None, + }); } } @@ -70,6 +73,7 @@ impl Visitor for AssetsFinder { kind, access_type: None, path: name, + specific_table: None, }); } } @@ -87,11 +91,12 @@ impl Visitor for AssetsFinder { fn visit_expr_constant(&mut self, node: ExprConstant) { match node.value { Constant::Str(s) => { - if let Some((kind, path)) = parse_asset_syntax(&s) { + if let Some((kind, path)) = parse_asset_syntax(&s, false) { self.assets.push(ParseAssetsResult { kind, path: path.to_string(), access_type: None, + specific_table: None, }); } } @@ -108,11 +113,12 @@ impl Visitor for AssetsFinder { if let Expr::Constant(ExprConstant { value: Constant::Str(s), .. }) = &keyword.value { - if let Some((kind, path)) = parse_asset_syntax(s) { + if let Some((kind, path)) = parse_asset_syntax(s, false) { self.assets.push(ParseAssetsResult { kind, path: path.to_string(), access_type: None, + specific_table: None, }); } } @@ -212,6 +218,7 @@ impl AssetsFinder { kind: *kind, path: path.to_string(), access_type, + specific_table: None, }); return Ok(()); } @@ -249,9 +256,15 @@ impl AssetsFinder { match arg_val { Some(Expr::Constant(ExprConstant { value: Constant::Str(value), .. })) => { - let path = parse_asset_syntax(&value).map(|(_, p)| p).unwrap_or(&value); - self.assets - .push(ParseAssetsResult { kind, path: path.to_string(), access_type }); + let path = parse_asset_syntax(&value, false) + .map(|(_, p)| p) + .unwrap_or(&value); + self.assets.push(ParseAssetsResult { + kind, + path: path.to_string(), + access_type, + specific_table: None, + }); } _ => return Err(()), }; diff --git a/backend/parsers/windmill-parser-sql/Cargo.toml b/backend/parsers/windmill-parser-sql/Cargo.toml index 8c853df1455f7..41533a6014fe4 100644 --- a/backend/parsers/windmill-parser-sql/Cargo.toml +++ b/backend/parsers/windmill-parser-sql/Cargo.toml @@ -20,4 +20,4 @@ anyhow.workspace = true lazy_static.workspace = true serde_json.workspace = true serde.workspace = true -nom.workspace = true +sqlparser = { version = "0.59.0", features = ["visitor"] } \ No newline at end of file diff --git a/backend/parsers/windmill-parser-sql/src/asset_parser.rs b/backend/parsers/windmill-parser-sql/src/asset_parser.rs index 0323bcb062be4..d357f1d494234 100644 --- a/backend/parsers/windmill-parser-sql/src/asset_parser.rs +++ b/backend/parsers/windmill-parser-sql/src/asset_parser.rs @@ -1,151 +1,480 @@ +use std::collections::HashMap; + +use sqlparser::{ + ast::{CopyTarget, Expr, ObjectName, TableFactor, Value, ValueWithSpan, Visit, Visitor}, + dialect::DuckDbDialect, + parser::Parser, +}; use windmill_parser::asset_parser::{ - merge_assets, AssetKind, AssetUsageAccessType, ParseAssetsResult, + merge_assets, parse_asset_syntax, AssetKind, AssetUsageAccessType, ParseAssetsResult, }; use AssetUsageAccessType::*; -use nom::{ - branch::alt, - bytes::complete::{tag, tag_no_case, take_while}, - character::complete::{char, multispace0}, - combinator::opt, - sequence::preceded, - IResult, Parser, -}; +pub fn parse_assets(input: &str) -> anyhow::Result>> { + let statements = Parser::parse_sql(&DuckDbDialect, input)?; -pub fn parse_assets<'a>(input: &str) -> anyhow::Result>> { - let mut assets = Vec::new(); - let mut remaining = input; + let mut collector = AssetCollector::new(); + for statement in statements { + let _ = statement.visit(&mut collector); + } - while !remaining.trim().is_empty() { - if let Ok((rest, _)) = parse_comment(remaining) { - remaining = rest; // skip comment - } - if let Ok((rest, res)) = parse_asset(remaining) { - assets.push(res); - remaining = rest; - } else { - remaining = &remaining[1..]; // skip 1 char and continue + for (_, (kind, path)) in collector.var_identifiers { + if collector + .assets + .iter() + .all(|a| a.path.as_ref() != path || a.kind != kind) + { + collector.assets.push(ParseAssetsResult { + kind: kind, + access_type: None, + path: path, + specific_table: None, + }); } } - Ok(merge_assets(assets)) + Ok(merge_assets(collector.assets)) } -fn parse_asset(input: &str) -> IResult<&str, ParseAssetsResult<&str>> { - alt(( - parse_s3_object_read.map(|path| ParseAssetsResult { - path, - kind: AssetKind::S3Object, - access_type: Some(R), - }), - parse_s3_object_write.map(|path| ParseAssetsResult { - path, - kind: AssetKind::S3Object, - access_type: Some(W), - }), - // Parse ambiguous access_types at the end if we could not find precisely read or copy - parse_s3_object_lit.map(|path| ParseAssetsResult { - path, - kind: AssetKind::S3Object, - access_type: None, - }), - parse_resource_lit.map(|path| ParseAssetsResult { - path, - kind: AssetKind::Resource, - access_type: None, - }), - parse_ducklake_lit.map(|path| ParseAssetsResult { - path, - kind: AssetKind::Ducklake, - access_type: None, - }), - parse_datatable_lit.map(|path| ParseAssetsResult { - path, - kind: AssetKind::DataTable, - access_type: None, - }), - )) - .parse(input) +/// Visitor that collects S3 asset literals from SQL statements +struct AssetCollector { + assets: Vec>, + // e.g set to Read when we are inside a SELECT ... FROM ... statement + current_access_type_stack: Vec, + // e.g ATTACH 'ducklake://a' AS dl; => { "dl": (Ducklake, "a") } + var_identifiers: HashMap, + // e.g USE dl; + currently_used_asset: Option<(AssetKind, String)>, + // } -/// Any expression that reads an s3 asset -fn parse_s3_object_read(input: &str) -> IResult<&str, &str> { - alt((parse_s3_object_read_fn, parse_s3_object_select_from)).parse(input) -} +impl AssetCollector { + fn new() -> Self { + Self { + assets: Vec::new(), + current_access_type_stack: Vec::with_capacity(8), + var_identifiers: HashMap::new(), + currently_used_asset: None, + } + } -/// Any expression that writes to an s3 asset -fn parse_s3_object_write(input: &str) -> IResult<&str, &str> { - // COPY (...) TO 's3://...' - let (input, _) = (tag_no_case("TO"), multispace0).parse(input)?; - let (input, path) = parse_s3_object_lit(input)?; - Ok((input, path)) -} + // Detect when we do 'a.b' and 'a' is associated with an asset in var_identifiers + // Or when we access 'b' and we did USE a; + fn get_associated_asset_from_obj_name( + &self, + name: &ObjectName, + ) -> Option> { + let access_type = self.current_access_type_stack.last().copied(); + if name.0.len() == 1 { + if name.0.first()?.as_ident()?.quote_style.is_some() { + return None; + } + // We don't want to infer that any simple identifier refers to an asset if + // we are not in a known R/W context + if access_type.is_none() { + return None; + } + if let Some((kind, path)) = &self.currently_used_asset { + return Some(ParseAssetsResult { + kind: *kind, + access_type, + path: path.clone(), + specific_table: None, + }); + } + } -/// read_parquet('s3://...') -fn parse_s3_object_read_fn(input: &str) -> IResult<&str, &str> { - let (input, _) = alt(( - tag_no_case("read_parquet"), - tag_no_case("read_csv"), - tag_no_case("read_json"), - )) - .parse(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char('(')(input)?; - let (input, _) = multispace0(input)?; - let (input, path) = parse_s3_object_lit(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(')')(input)?; - Ok((input, path)) -} + // Check if the first part of the name (the a in a.b) is associated with an asset + if name.0.len() < 2 { + return None; + } + let ident = name.0.first()?.as_ident()?; + let (kind, path) = self.var_identifiers.get(&ident.value)?; + Some(ParseAssetsResult { + kind: *kind, + access_type, + path: path.clone(), + specific_table: None, + }) + } + + fn handle_string_literal(&mut self, s: &str) { + // Check if the string matches our asset syntax patterns + if let Some((kind, path)) = parse_asset_syntax(s, false) { + if kind == AssetKind::S3Object { + self.assets.push(ParseAssetsResult { + kind, + path: path.to_string(), + access_type: self.current_access_type_stack.last().copied(), + specific_table: None, + }); + } + } + } + + fn handle_obj_name_pre(&mut self, name: &ObjectName) { + if let Some(fname) = get_trivial_obj_name(name) { + if is_read_fn(fname) { + self.current_access_type_stack.push(R); + } + } + if let Some(str_lit) = get_str_lit_from_obj_name(name) { + self.handle_string_literal(str_lit); + } + if let Some(asset) = self.get_associated_asset_from_obj_name(name) { + self.assets.push(asset); + } + } -/// SELECT ... FROM 's3://...' -fn parse_s3_object_select_from(input: &str) -> IResult<&str, &str> { - let (input, _) = tag_no_case("FROM").parse(input)?; - let (input, _) = multispace0(input)?; - let (input, path) = parse_s3_object_lit(input)?; - Ok((input, path)) + fn handle_obj_name_post(&mut self, name: &ObjectName) { + if self.current_access_type_stack.is_empty() { + return; + } + if let Some(fname) = get_trivial_obj_name(name) { + if is_read_fn(fname) { + self.current_access_type_stack.pop(); + } + } + } } -/// 's3://...' -fn parse_s3_object_lit(input: &str) -> IResult<&str, &str> { - let (input, _) = quote(input)?; - let (input, _) = tag("s3://").parse(input)?; - let (input, path) = take_while(|c| c != '\'' && c != '"')(input)?; - let (input, _) = quote(input)?; - Ok((input, path)) + +impl Visitor for AssetCollector { + type Break = (); + + fn pre_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> std::ops::ControlFlow { + match table_factor { + TableFactor::Table { name, .. } => self.handle_obj_name_pre(name), + _ => {} + } + std::ops::ControlFlow::Continue(()) + } + + fn post_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> std::ops::ControlFlow { + match table_factor { + TableFactor::Table { name, .. } => self.handle_obj_name_post(name), + _ => {} + } + std::ops::ControlFlow::Continue(()) + } + + fn pre_visit_expr(&mut self, expr: &Expr) -> std::ops::ControlFlow { + match expr { + Expr::Value(ValueWithSpan { value: Value::SingleQuotedString(s), .. }) => { + self.handle_string_literal(s) + } + Expr::Value(ValueWithSpan { value: Value::DoubleQuotedString(s), .. }) => { + self.handle_string_literal(s); + } + Expr::Function(func) => self.handle_obj_name_pre(&func.name), + _ => {} + } + std::ops::ControlFlow::Continue(()) + } + + fn post_visit_expr(&mut self, expr: &Expr) -> std::ops::ControlFlow { + match expr { + Expr::Function(func) => self.handle_obj_name_post(&func.name), + _ => {} + } + std::ops::ControlFlow::Continue(()) + } + + fn pre_visit_statement( + &mut self, + statement: &sqlparser::ast::Statement, + ) -> std::ops::ControlFlow { + if let Some(access_type) = get_stmt_access_type(statement) { + self.current_access_type_stack.push(access_type); + } + + match statement { + sqlparser::ast::Statement::Copy { target: CopyTarget::File { filename }, .. } => { + self.current_access_type_stack.push(W); + self.handle_string_literal(filename); + self.current_access_type_stack.pop(); + } + sqlparser::ast::Statement::AttachDuckDBDatabase { + database_path, + database_alias, + .. + } => { + if let Some((kind, path)) = parse_asset_syntax(&database_path.value, true) { + if kind == AssetKind::Ducklake + || kind == AssetKind::DataTable + || kind == AssetKind::Resource + { + if let Some(database_alias) = database_alias { + self.var_identifiers + .insert(database_alias.value.clone(), (kind, path.to_string())); + } + } + } + } + sqlparser::ast::Statement::DetachDuckDBDatabase { database_alias, .. } => { + let asset = self.var_identifiers.remove(&database_alias.value); + if self.currently_used_asset == asset { + self.currently_used_asset = None; + } + } + sqlparser::ast::Statement::Use(sqlparser::ast::Use::Object(obj_name)) => { + if let Some((kind, path)) = self.var_identifiers.get(&obj_name.to_string()) { + self.currently_used_asset = Some((*kind, path.clone())); + } else { + self.currently_used_asset = None; + } + } + _ => {} + } + + std::ops::ControlFlow::Continue(()) + } + + fn post_visit_statement( + &mut self, + statement: &sqlparser::ast::Statement, + ) -> std::ops::ControlFlow { + if let Some(_access_type) = get_stmt_access_type(statement) { + self.current_access_type_stack.pop(); + } + std::ops::ControlFlow::Continue(()) + } + + fn pre_visit_query( + &mut self, + _query: &sqlparser::ast::Query, + ) -> std::ops::ControlFlow { + self.current_access_type_stack.push(R); + std::ops::ControlFlow::Continue(()) + } + + fn post_visit_query( + &mut self, + _query: &sqlparser::ast::Query, + ) -> std::ops::ControlFlow { + self.current_access_type_stack.pop(); + std::ops::ControlFlow::Continue(()) + } + + fn pre_visit_relation(&mut self, relation: &ObjectName) -> std::ops::ControlFlow { + self.handle_obj_name_pre(relation); + std::ops::ControlFlow::Continue(()) + } + + fn post_visit_relation(&mut self, relation: &ObjectName) -> std::ops::ControlFlow { + self.handle_obj_name_post(relation); + std::ops::ControlFlow::Continue(()) + } } -fn quote(input: &str) -> IResult<&str, char> { - alt((char('\''), char('\"'))).parse(input) +fn is_read_fn(fname: &str) -> bool { + fname.eq_ignore_ascii_case("read_parquet") + || fname.eq_ignore_ascii_case("read_csv") + || fname.eq_ignore_ascii_case("read_json") } -fn parse_resource_lit(input: &str) -> IResult<&str, &str> { - let (input, _) = quote(input)?; - let (input, _) = alt((tag("$res:"), tag("res://"))).parse(input)?; - let (input, path) = take_while(|c| c != '\'' && c != '"')(input)?; - let (input, _) = quote(input)?; - Ok((input, path)) +fn get_stmt_access_type(statement: &sqlparser::ast::Statement) -> Option { + match statement { + sqlparser::ast::Statement::Query(_) => Some(R), + sqlparser::ast::Statement::Insert(insert) => { + Some(if insert.returning.is_some() { RW } else { W }) + } + sqlparser::ast::Statement::Update { returning, .. } => { + Some(if returning.is_some() { RW } else { W }) + } + sqlparser::ast::Statement::Delete(delete) => { + Some(if delete.returning.is_some() { RW } else { W }) + } + sqlparser::ast::Statement::CreateTable { .. } => Some(W), + sqlparser::ast::Statement::CreateView { .. } => Some(W), + _ => None, + } } -fn parse_ducklake_lit(input: &str) -> IResult<&str, &str> { - let (input, _) = quote(input)?; - let (input, _) = tag("ducklake").parse(input)?; - let (input, path) = - opt(preceded(tag("://"), take_while(|c| c != '\'' && c != '"'))).parse(input)?; - let (input, _) = quote(input)?; - Ok((input, path.unwrap_or("main"))) +fn get_trivial_obj_name(name: &sqlparser::ast::ObjectName) -> Option<&str> { + if name.0.len() != 1 { + return None; + } + Some(name.0.first()?.as_ident()?.value.as_str()) } -fn parse_datatable_lit(input: &str) -> IResult<&str, &str> { - let (input, _) = quote(input)?; - let (input, _) = tag("datatable").parse(input)?; - let (input, path) = - opt(preceded(tag("://"), take_while(|c| c != '\'' && c != '"'))).parse(input)?; - let (input, _) = quote(input)?; - Ok((input, path.unwrap_or("main"))) +fn get_str_lit_from_obj_name(name: &ObjectName) -> Option<&str> { + if name.0.len() != 1 { + return None; + } + let ident = name.0.first()?.as_ident()?; + if ident.quote_style != Some('\'') { + return None; + } + Some(ident.value.as_str()) } -fn parse_comment(input: &str) -> IResult<&str, &str> { - let (input, _) = tag("--").parse(input)?; - let (input, comment) = take_while(|c| c != '\n')(input)?; - Ok((input, comment)) +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_sql_asset_parser_s3_literals() { + let input = r#" + SELECT * FROM read_parquet('s3:///a.parquet'); + COPY (SELECT * FROM 's3://snd/b.parquet') TO 's3:///c.parquet'; + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ + ParseAssetsResult { + kind: AssetKind::S3Object, + path: "/a.parquet".to_string(), + access_type: Some(R), + specific_table: None + }, + ParseAssetsResult { + kind: AssetKind::S3Object, + path: "/c.parquet".to_string(), + access_type: Some(W), + specific_table: None + }, + ParseAssetsResult { + kind: AssetKind::S3Object, + path: "snd/b.parquet".to_string(), + access_type: Some(R), + specific_table: None + }, + ]) + ); + } + + #[test] + fn test_sql_asset_parser_attach_no_usage_is_registered_as_unknown() { + let input = r#" + ATTACH 'ducklake://my_dl' AS dl; + SELECT 2; + USE dl; + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::Ducklake, + path: "my_dl".to_string(), + access_type: None, + specific_table: None + },]) + ); + } + + #[test] + fn test_sql_asset_parser_attach_dot_notation_read() { + let input = r#" + ATTACH 'ducklake://my_dl' AS dl; + SELECT * FROM dl.table1; + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::Ducklake, + path: "my_dl".to_string(), + access_type: Some(R), + specific_table: None + },]) + ); + } + + #[test] + fn test_sql_asset_parser_attach_dot_notation_write() { + let input = r#" + ATTACH 'ducklake://my_dl' AS dl; + SELECT dl.read_bait FROM unrelated_table; -- dl. doesn't access the asset + INSERT INTO dl.table1 VALUES ('test'); + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::Ducklake, + path: "my_dl".to_string(), + access_type: Some(W), + specific_table: None + },]) + ); + } + + #[test] + fn test_sql_asset_parser_detach() { + let input = r#" + ATTACH 'ducklake://my_dl' AS dl; + DETACH dl; + SELECT * FROM dl.table1; + "#; + let s = parse_assets(input); + assert_eq!(s.map_err(|e| e.to_string()), Ok(vec![])); + } + + #[test] + fn test_sql_asset_parser_implicit_use_asset() { + let input = r#" + ATTACH 'ducklake://my_dl' AS dl; + USE dl; + INSERT INTO table1 VALUES ('test'); + USE memory; + SELECT * FROM table1; + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::Ducklake, + path: "my_dl".to_string(), + access_type: Some(W), + specific_table: None + },]) + ); + } + + #[test] + fn test_sql_asset_parser_default_main() { + let input = r#" + ATTACH 'datatable' AS dl; + INSERT INTO dl.table1 VALUES ('test'); + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::DataTable, + path: "main".to_string(), + access_type: Some(W), + specific_table: None + },]) + ); + } + + #[test] + fn test_sql_asset_parser_create_table() { + let input = r#" + ATTACH 'ducklake' AS dl; USE dl; + CREATE TABLE friends ( + name text, + age int + ); + INSERT INTO friends VALUES ($name, $age); + SELECT * FROM friends; + "#; + let s = parse_assets(input); + assert_eq!( + s.map_err(|e| e.to_string()), + Ok(vec![ParseAssetsResult { + kind: AssetKind::Ducklake, + path: "main".to_string(), + access_type: Some(RW), + specific_table: None + },]) + ); + } } diff --git a/backend/parsers/windmill-parser-ts/src/asset_parser.rs b/backend/parsers/windmill-parser-ts/src/asset_parser.rs index 6ac4c4dd17c40..a6a5b13b6be3a 100644 --- a/backend/parsers/windmill-parser-ts/src/asset_parser.rs +++ b/backend/parsers/windmill-parser-ts/src/asset_parser.rs @@ -58,11 +58,12 @@ impl Visit for AssetsFinder { fn visit_lit(&mut self, node: &swc_ecma_ast::Lit) { match node { swc_ecma_ast::Lit::Str(str) => { - if let Some((kind, path)) = parse_asset_syntax(str.value.as_str()) { + if let Some((kind, path)) = parse_asset_syntax(str.value.as_str(), false) { self.assets.push(ParseAssetsResult { kind, path: path.to_string(), access_type: None, + specific_table: None, }); } } @@ -99,8 +100,12 @@ impl Visit for AssetsFinder { { continue; } - self.assets - .push(ParseAssetsResult { kind, access_type: None, path: path.clone() }); + self.assets.push(ParseAssetsResult { + kind, + access_type: None, + path: path.clone(), + specific_table: None, + }); } // Restore state - identifiers declared in this block go out of scope @@ -192,8 +197,12 @@ impl Visit for AssetsFinder { // Determine access type based on SQL keywords let access_type = detect_sql_access_type(&sql); - self.assets - .push(ParseAssetsResult { kind, path: asset_name, access_type }); + self.assets.push(ParseAssetsResult { + kind, + path: asset_name, + access_type, + specific_table: None, + }); } } @@ -221,9 +230,15 @@ impl AssetsFinder { match arg_value.map(|e| e.expr.as_ref()) { Some(Expr::Lit(Lit::Str(Str { value, .. }))) => { - let path = parse_asset_syntax(&value).map(|(_, p)| p).unwrap_or(&value); - self.assets - .push(ParseAssetsResult { kind, path: path.to_string(), access_type }); + let path = parse_asset_syntax(&value, false) + .map(|(_, p)| p) + .unwrap_or(&value); + self.assets.push(ParseAssetsResult { + kind, + path: path.to_string(), + access_type, + specific_table: None, + }); } _ => return Err(()), } diff --git a/backend/parsers/windmill-parser-wasm/src/lib.rs b/backend/parsers/windmill-parser-wasm/src/lib.rs index f6de0a7b0508a..61cc11d81d0e2 100644 --- a/backend/parsers/windmill-parser-wasm/src/lib.rs +++ b/backend/parsers/windmill-parser-wasm/src/lib.rs @@ -190,41 +190,36 @@ pub fn parse_ruby(code: &str) -> String { #[cfg(feature = "sql-parser")] #[wasm_bindgen] pub fn parse_assets_sql(code: &str) -> String { - if let Ok(r) = windmill_parser_sql::parse_assets(code) { - return serde_json::to_string(&r).unwrap(); - } else { - return "Invalid".to_string(); + match windmill_parser_sql::parse_assets(code) { + Ok(r) => serde_json::to_string(&r).unwrap(), + Err(err) => format!("err: {:?}", err), } } #[cfg(feature = "ts-parser")] #[wasm_bindgen] pub fn parse_assets_ts(code: &str) -> String { - if let Ok(r) = windmill_parser_ts::parse_assets(code) { - return serde_json::to_string(&r).unwrap(); - } else { - return "Invalid".to_string(); + match windmill_parser_ts::parse_assets(code) { + Ok(r) => serde_json::to_string(&r).unwrap(), + Err(err) => format!("err: {:?}", err), } } #[cfg(feature = "py-parser")] #[wasm_bindgen] pub fn parse_assets_py(code: &str) -> String { - if let Ok(r) = windmill_parser_py::parse_assets(code) { - return serde_json::to_string(&r).unwrap(); - } else { - return "Invalid".to_string(); + match windmill_parser_py::parse_assets(code) { + Ok(r) => serde_json::to_string(&r).unwrap(), + Err(err) => format!("err: {:?}", err), } } #[cfg(feature = "ansible-parser")] #[wasm_bindgen] pub fn parse_assets_ansible(code: &str) -> String { - let o = windmill_parser_yaml::parse_assets(code); - if let Ok(r) = o { - return serde_json::to_string(&r).unwrap(); - } else { - return format!("err: {:?}", o.err().unwrap()); + match windmill_parser_yaml::parse_assets(code) { + Ok(r) => serde_json::to_string(&r).unwrap(), + Err(err) => format!("err: {:?}", err), } } diff --git a/backend/parsers/windmill-parser-yaml/src/asset_parser.rs b/backend/parsers/windmill-parser-yaml/src/asset_parser.rs index 0b9ba0d38a89b..94414db2c0a96 100644 --- a/backend/parsers/windmill-parser-yaml/src/asset_parser.rs +++ b/backend/parsers/windmill-parser-yaml/src/asset_parser.rs @@ -1,6 +1,6 @@ use windmill_parser::asset_parser::{ - merge_assets, AssetKind, AssetUsageAccessType, ParseAssetsResult, - }; + merge_assets, AssetKind, AssetUsageAccessType, ParseAssetsResult, +}; use crate::{parse_ansible_reqs, ResourceOrVariablePath}; @@ -12,6 +12,7 @@ pub fn parse_assets(input: &str) -> anyhow::Result kind: AssetKind::Resource, path: delegate_to_git_repo_details.resource, access_type: Some(AssetUsageAccessType::R), + specific_table: None, }) } @@ -21,6 +22,7 @@ pub fn parse_assets(input: &str) -> anyhow::Result kind: AssetKind::Resource, path: pinned_res, access_type: Some(AssetUsageAccessType::R), + specific_table: None, }) } } @@ -31,6 +33,7 @@ pub fn parse_assets(input: &str) -> anyhow::Result kind: AssetKind::Resource, path: resource, access_type: Some(AssetUsageAccessType::R), + specific_table: None, }) } } @@ -38,4 +41,3 @@ pub fn parse_assets(input: &str) -> anyhow::Result Ok(merge_assets(assets)) } - diff --git a/backend/parsers/windmill-parser/src/asset_parser.rs b/backend/parsers/windmill-parser/src/asset_parser.rs index 26a9203c8ef94..d6bc171ec2911 100644 --- a/backend/parsers/windmill-parser/src/asset_parser.rs +++ b/backend/parsers/windmill-parser/src/asset_parser.rs @@ -1,6 +1,6 @@ use serde::Serialize; -#[derive(Serialize, PartialEq, Clone, Copy)] +#[derive(Serialize, PartialEq, Clone, Copy, Debug)] #[serde(rename_all(serialize = "lowercase"))] pub enum AssetUsageAccessType { R, @@ -10,7 +10,7 @@ pub enum AssetUsageAccessType { use AssetUsageAccessType::*; -#[derive(Serialize, PartialEq, Clone, Copy)] +#[derive(Serialize, PartialEq, Clone, Copy, Debug)] #[serde(rename_all(serialize = "lowercase"))] pub enum AssetKind { S3Object, @@ -19,12 +19,14 @@ pub enum AssetKind { DataTable, } -#[derive(Serialize)] +#[derive(Serialize, Debug, PartialEq)] pub struct ParseAssetsResult> { pub kind: AssetKind, pub path: S, #[serde(skip_serializing_if = "Option::is_none")] pub access_type: Option, // None in case of ambiguity + #[serde(skip_serializing_if = "Option::is_none")] + pub specific_table: Option, // e.g SELECT * FROM my_ducklake.friends => specific_table = "friends" } #[derive(Debug, Clone, Serialize)] @@ -58,8 +60,12 @@ pub fn merge_assets>(assets: Vec>) -> Vec Option<(AssetKind, &str)> { - if s.starts_with("s3://") { +pub fn parse_asset_syntax(s: &str, enable_default_syntax: bool) -> Option<(AssetKind, &str)> { + if enable_default_syntax && s == "datatable" { + Some((AssetKind::DataTable, "main")) + } else if enable_default_syntax && s == "ducklake" { + Some((AssetKind::Ducklake, "main")) + } else if s.starts_with("s3://") { Some((AssetKind::S3Object, &s[5..])) } else if s.starts_with("res://") { Some((AssetKind::Resource, &s[6..])) diff --git a/frontend/src/lib/components/EditorBar.svelte b/frontend/src/lib/components/EditorBar.svelte index 9718e3f6526f9..54526592e4a9f 100644 --- a/frontend/src/lib/components/EditorBar.svelte +++ b/frontend/src/lib/components/EditorBar.svelte @@ -741,7 +741,7 @@ JsonNode ${windmillPathToCamelCaseName(path)} = JsonNode.Parse(await client.GetS } }} tooltip="Attach a Ducklake to your scripts. Ducklake allows you to manipulate large data on S3 blob files through a traditional SQL interface." - documentationLink="https://www.windmill.dev/docs/core_concepts/ducklake" + documentationLink="https://www.windmill.dev/docs/core_concepts/persistent_storage/ducklake" itemName="ducklake" loadItems={async () => (await WorkspaceService.listDucklakes({ workspace: $workspaceStore ?? 'NO_W' })).map( @@ -783,7 +783,7 @@ JsonNode ${windmillPathToCamelCaseName(path)} = JsonNode.Parse(await client.GetS } }} tooltip="Attach a datatable to your script." - documentationLink="https://www.windmill.dev/docs/core_concepts/data_tables" + documentationLink="https://www.windmill.dev/docs/core_concepts/persistent_storage/data_tables" itemName="data table" loadItems={async () => (await WorkspaceService.listDataTables({ workspace: $workspaceStore ?? 'NO_W' })).map( @@ -836,7 +836,7 @@ JsonNode ${windmillPathToCamelCaseName(path)} = JsonNode.Parse(await client.GetS onSelectAndClose={(s3obj) => { let s = `'${formatS3Object(s3obj)}'` if (lang === 'duckdb') { - editor?.insertAtCursor(`SELECT * FROM ${s}`) + editor?.insertAtCursor(`SELECT * FROM ${s};`) } else if (lang === 'python3') { if (!editor?.getCode().includes('import wmill')) { editor?.insertAtBeginning('import wmill\n') diff --git a/frontend/src/lib/components/ScriptEditor.svelte b/frontend/src/lib/components/ScriptEditor.svelte index 1346502948af6..afeea09dd4a4c 100644 --- a/frontend/src/lib/components/ScriptEditor.svelte +++ b/frontend/src/lib/components/ScriptEditor.svelte @@ -58,6 +58,7 @@ import { copilotInfo } from '$lib/aiStore' import JsonInputs from '$lib/components/JsonInputs.svelte' import Toggle from './Toggle.svelte' + import { deepEqual } from 'fast-equals' interface Props { // Exported @@ -158,12 +159,14 @@ $effect(() => { ;[lang, code] untrack(() => { - inferAssets(lang, code).then((newAssets: AssetWithAltAccessType[]) => { + inferAssets(lang, code).then((inferAssetsResult) => { + if (inferAssetsResult.status === 'error') return + let newAssets = inferAssetsResult.assets as AssetWithAltAccessType[] for (const asset of newAssets) { const old = assets?.find((a) => assetEq(a, asset)) if (old?.alt_access_type) asset.alt_access_type = old.alt_access_type } - assets = newAssets + if (!deepEqual(assets, newAssets)) assets = newAssets }) if (lang === 'ansible') { diff --git a/frontend/src/lib/components/assets/AssetsDropdownButton.svelte b/frontend/src/lib/components/assets/AssetsDropdownButton.svelte index e480a86ed100a..4987ba9aa51b0 100644 --- a/frontend/src/lib/components/assets/AssetsDropdownButton.svelte +++ b/frontend/src/lib/components/assets/AssetsDropdownButton.svelte @@ -107,7 +107,7 @@ 'text-xs flex items-center gap-1.5 px-2 rounded-md relative', 'border', 'bg-surface hover:bg-surface-hover active:bg-surface', - 'transition-all hover:text-primary cursor-pointer' + 'transition-all hover:text-primary backdrop-blur-md cursor-pointer' )} >
@@ -162,7 +160,11 @@ Please select manually
- + {#snippet children({ item })} diff --git a/frontend/src/lib/components/assets/JobAssetsViewer.svelte b/frontend/src/lib/components/assets/JobAssetsViewer.svelte index 6375783595d0a..22ca628712cc2 100644 --- a/frontend/src/lib/components/assets/JobAssetsViewer.svelte +++ b/frontend/src/lib/components/assets/JobAssetsViewer.svelte @@ -35,10 +35,9 @@ ) } if (job.job_kind === 'script') { - return [ - ...(await inferAssets(job.language!, job.raw_code ?? '')), - ...parseInputArgsAssets(job.args ?? {}) - ] + let inferAssetsResult = await inferAssets(job.language!, job.raw_code ?? '') + let assets = inferAssetsResult.status === 'ok' ? inferAssetsResult.assets : [] + return [...assets, ...parseInputArgsAssets(job.args ?? {})] } return [] } diff --git a/frontend/src/lib/components/assets/lib.ts b/frontend/src/lib/components/assets/lib.ts index cb7b824a64d06..5296308c9dfff 100644 --- a/frontend/src/lib/components/assets/lib.ts +++ b/frontend/src/lib/components/assets/lib.ts @@ -93,8 +93,8 @@ export function formatAssetAccessType(accessType: AssetUsageAccessType | undefin } export function getAccessType(asset: AssetWithAltAccessType): AssetUsageAccessType | undefined { - if (asset.alt_access_type) return asset.alt_access_type if (asset.access_type) return asset.access_type + if (asset.alt_access_type) return asset.alt_access_type } export function getFlowModuleAssets( diff --git a/frontend/src/lib/components/flows/FlowAssetsHandler.svelte b/frontend/src/lib/components/flows/FlowAssetsHandler.svelte index cd4ebc98c49e3..f4e52182372b9 100644 --- a/frontend/src/lib/components/flows/FlowAssetsHandler.svelte +++ b/frontend/src/lib/components/flows/FlowAssetsHandler.svelte @@ -119,14 +119,14 @@ }) async function parseAndUpdateRawScriptModule(v: RawScript) { - try { - let parsedAssets: AssetWithAltAccessType[] = await inferAssets(v.language, v.content) - for (const asset of parsedAssets) { - const old = v.assets?.find((a) => assetEq(a, asset)) - if (old?.alt_access_type) asset.alt_access_type = old.alt_access_type - } - if (!deepEqual(v.assets, parsedAssets)) v.assets = parsedAssets - } catch (e) {} + let inferAssetsResult = await inferAssets(v.language, v.content) + if (inferAssetsResult.status === 'error') return + let newAssets = inferAssetsResult.assets as AssetWithAltAccessType[] + for (const asset of newAssets) { + const old = v.assets?.find((a) => assetEq(a, asset)) + if (old?.alt_access_type) asset.alt_access_type = old.alt_access_type + } + if (!deepEqual(v.assets, newAssets)) v.assets = newAssets } // Check for raw script modules whose assets were not parsed. Useful for flows created diff --git a/frontend/src/lib/components/workspaceSettings/DucklakeSettings.svelte b/frontend/src/lib/components/workspaceSettings/DucklakeSettings.svelte index 88bdaa877ba97..093c78c9f6d76 100644 --- a/frontend/src/lib/components/workspaceSettings/DucklakeSettings.svelte +++ b/frontend/src/lib/components/workspaceSettings/DucklakeSettings.svelte @@ -183,7 +183,7 @@
Ducklake
- + Windmill has first class support for Ducklake. You can use and explore ducklakes like a normal SQL database, even though the data is actually stored in parquet files in S3 ! diff --git a/frontend/src/lib/infer.ts b/frontend/src/lib/infer.ts index ffe37a860f6b7..228e68d491d80 100644 --- a/frontend/src/lib/infer.ts +++ b/frontend/src/lib/infer.ts @@ -89,33 +89,43 @@ async function initWasmRuby() { await initRubyParser(wasmUrlRuby) } +type InferAssetsResult = + | { status: 'ok'; assets: AssetWithAccessType[] } + | { status: 'error'; error: string } + export async function inferAssets( language: SupportedLanguage | undefined, code: string -): Promise { +): Promise { + function wrap(raw_result: string): InferAssetsResult { + if (raw_result.startsWith('err:')) { + return { status: 'error', error: raw_result.slice(4).trim() } + } + return { status: 'ok', assets: JSON.parse(raw_result) as AssetWithAccessType[] } + } + try { if (language === 'duckdb') { await initWasmRegex() - let r = JSON.parse(parse_assets_sql(code)) - return r + return wrap(parse_assets_sql(code)) } if (language === 'deno' || language === 'nativets' || language === 'bun') { await initWasmTs() - return JSON.parse(parse_assets_ts(code)) + return wrap(parse_assets_ts(code)) } if (language === 'python3') { await initWasmPython() - return JSON.parse(parse_assets_py(code)) + return wrap(parse_assets_py(code)) } if (language === 'ansible') { await initWasmYaml() - return JSON.parse(parse_assets_ansible(code)) + return wrap(parse_assets_ansible(code)) } } catch (e) { - console.error('error parsing assets', e) - return [] + return { status: 'error', error: (e as Error)?.message || JSON.stringify(e) } } - return [] + + return { status: 'ok', assets: [] } } export async function inferAnsibleExecutionMode(code: string): Promise { diff --git a/frontend/src/lib/script_helpers.ts b/frontend/src/lib/script_helpers.ts index 58bec208de794..f8fc95c8f382e 100644 --- a/frontend/src/lib/script_helpers.ts +++ b/frontend/src/lib/script_helpers.ts @@ -323,7 +323,7 @@ const DUCKDB_INIT_CODE = `-- result_collection=last_statement_all_rows -- SELECT * FROM db.public.friends; -- Click the +Ducklake button to use a ducklake --- https://www.windmill.dev/docs/core_concepts/ducklake +-- https://www.windmill.dev/docs/core_concepts/persistent_storage/ducklake -- -- ATTACH 'ducklake' AS dl; -- USE dl;