Skip to content

Commit e122898

Browse files
committed
Merge branch 'main' into chore/remove-pyconfig
2 parents 7e0c539 + 0a9ca68 commit e122898

13 files changed

Lines changed: 1072 additions & 326 deletions

File tree

Cargo.lock

Lines changed: 255 additions & 286 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ arrow = { version = "58" }
4040
arrow-array = { version = "58" }
4141
arrow-schema = { version = "58" }
4242
arrow-select = { version = "58" }
43-
datafusion = { version = "53" }
44-
datafusion-substrait = { version = "53" }
45-
datafusion-proto = { version = "53" }
46-
datafusion-ffi = { version = "53" }
47-
datafusion-catalog = { version = "53", default-features = false }
48-
datafusion-common = { version = "53", default-features = false }
49-
datafusion-functions-aggregate = { version = "53" }
50-
datafusion-functions-window = { version = "53" }
51-
datafusion-expr = { version = "53" }
43+
datafusion = { version = "54" }
44+
datafusion-substrait = { version = "54" }
45+
datafusion-proto = { version = "54" }
46+
datafusion-ffi = { version = "54" }
47+
datafusion-catalog = { version = "54", default-features = false }
48+
datafusion-common = { version = "54", default-features = false }
49+
datafusion-functions-aggregate = { version = "54" }
50+
datafusion-functions-window = { version = "54" }
51+
datafusion-expr = { version = "54" }
5252
prost = "0.14.3"
5353
serde_json = "1"
5454
uuid = { version = "1.23" }
@@ -71,12 +71,12 @@ codegen-units = 2
7171
# We cannot publish to crates.io with any patches in the below section. Developers
7272
# must remove any entries in this section before creating a release candidate.
7373
[patch.crates-io]
74-
datafusion = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
75-
datafusion-substrait = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
76-
datafusion-proto = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
77-
datafusion-ffi = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
78-
datafusion-catalog = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
79-
datafusion-common = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
80-
datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
81-
datafusion-functions-window = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
82-
datafusion-expr = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" }
74+
datafusion = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
75+
datafusion-substrait = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
76+
datafusion-proto = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
77+
datafusion-ffi = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
78+
datafusion-catalog = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
79+
datafusion-common = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
80+
datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
81+
datafusion-functions-window = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }
82+
datafusion-expr = { git = "https://github.com/apache/datafusion", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" }

crates/core/src/codec.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
//!
2020
//! Datafusion-python plans can carry references to Python-defined
2121
//! objects that the upstream protobuf codecs do not know how to
22-
//! serialize: pure-Python scalar UDFs, Python query-planning
23-
//! extensions, and so on. Their state lives inside `Py<PyAny>`
24-
//! callables and closures rather than being recoverable from a name
25-
//! in the receiver's function registry. To ship a plan across a
26-
//! process boundary (pickle, `multiprocessing`, Ray actor,
22+
//! serialize: pure-Python scalar / aggregate / window UDFs, Python
23+
//! query-planning extensions, and so on. Their state lives inside
24+
//! `Py<PyAny>` callables and closures rather than being recoverable
25+
//! from a name in the receiver's function registry. To ship a plan
26+
//! across a process boundary (pickle, `multiprocessing`, Ray actor,
2727
//! `datafusion-distributed`, etc.) those payloads have to be encoded
2828
//! into the proto wire format itself.
2929
//!
@@ -256,7 +256,12 @@ impl PythonLogicalCodec {
256256
/// `cloudpickle.loads` on the inline `DFPY*` payload. It does
257257
/// **not** make `pickle.loads(untrusted_bytes)` safe; treat every
258258
/// `pickle.loads` on untrusted input as unsafe regardless of this
259-
/// setting.
259+
/// setting. See `docs/source/user-guide/io/distributing_work.rst`
260+
/// (Security section) for the full threat model, and Python's
261+
/// [pickle module security warning][1] for why `pickle.loads` is
262+
/// unsafe in general.
263+
///
264+
/// [1]: https://docs.python.org/3/library/pickle.html#module-pickle
260265
pub fn with_python_udf_inlining(mut self, enabled: bool) -> Self {
261266
self.python_udf_inlining = enabled;
262267
self
@@ -433,7 +438,7 @@ fn refuse_inline_payload(kind: &str, name: &str) -> datafusion::error::DataFusio
433438
/// encoding on this layer too — otherwise a plan with a Python UDF
434439
/// would round-trip at the logical level but break at the physical
435440
/// level. Both layers reuse the shared payload framing
436-
/// ([`PY_SCALAR_UDF_FAMILY`]) so the wire format is identical.
441+
/// ([`PY_SCALAR_UDF_FAMILY`] et al.) so the wire format is identical.
437442
#[derive(Debug)]
438443
pub struct PythonPhysicalCodec {
439444
inner: Arc<dyn PhysicalExtensionCodec>,

crates/core/src/dataset_exec.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,11 @@ use datafusion::arrow::datatypes::SchemaRef;
2121
use datafusion::arrow::error::{ArrowError, Result as ArrowResult};
2222
use datafusion::arrow::pyarrow::PyArrowType;
2323
use datafusion::arrow::record_batch::RecordBatch;
24-
use datafusion::common::tree_node::TreeNodeRecursion;
2524
use datafusion::error::{DataFusionError as InnerDataFusionError, Result as DFResult};
2625
use datafusion::execution::context::TaskContext;
2726
use datafusion::logical_expr::Expr;
2827
use datafusion::logical_expr::utils::conjunction;
29-
use datafusion::physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
28+
use datafusion::physical_expr::{EquivalenceProperties, LexOrdering};
3029
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
3130
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
3231
use datafusion::physical_plan::{
@@ -234,13 +233,6 @@ impl ExecutionPlan for DatasetExec {
234233
Ok(Arc::new(self.projected_statistics.clone()))
235234
}
236235

237-
fn apply_expressions(
238-
&self,
239-
_f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult<TreeNodeRecursion>,
240-
) -> DFResult<TreeNodeRecursion> {
241-
Ok(TreeNodeRecursion::Continue)
242-
}
243-
244236
fn properties(&self) -> &Arc<PlanProperties> {
245237
&self.plan_properties
246238
}

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ Example
7676
user-guide/common-operations/index
7777
user-guide/io/index
7878
user-guide/configuration
79+
user-guide/distributing-work
7980
user-guide/sql
8081
user-guide/upgrade-guides
8182
user-guide/ai-coding-assistants

0 commit comments

Comments
 (0)