Description
I’ve had two cases where I helped with user questions about Gradio here and here, which concern false negatives. The first case was missed due to CodeQL not propagating from gr.File() (this Gradio component allows for uploading files in a web app) to gr.File().orig_name and gr.File() to .gr.File().name. Both name and orig_name attributes hold path to the file, and are usually used to read a file.
It would make sense to add them as a taint step for Gradio, but quick-evaluating the hacky taint step I proposed gives way more results than expected. It seems to report any attribute read. Would love to get input on how to improve it.
/**
* Extra taint propagation from gr.File to gr.File.name and gr.File.orig_name,
* which hold the path to the file.
*/
private class GrFileTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::AttrRead attr |
nodeFrom = API::moduleImport("gradio").getMember("File").getReturn().asSource() and
attr.accesses(nodeFrom, ["orig_name", "name"])
and nodeTo = attr
)
}
}
Note that finding this vulnerability required also another step from config_file
to open(config_file)
. Please see the thread for more info.
This makes me wonder if we should have a taint step to an open()
call, since there may be similar cases like this one. Please see code below for details.
code with the missed vulnerability from the second user report and the taint steps
import pickle
import gradio as gr
def update_ui_from_config(config_file):
if config_file is not None:
loaded_config = load_config_from_file(config_file.name)
def load_config_from_file(config_file):
"""Load settings from a UUID.pkl file."""
try:
with open(config_file, 'rb') as f:
settings = pickle.load(f)
return settings
except Exception as e:
return f"Error loading configuration: {str(e)}"
with gr.TabItem(":file_folder: Configuration", id=5):
with gr.Group():
config_file_input = gr.File(
label="Load Config File",
file_types=[".pkl"],
interactive=True
)
load_config_button = gr.Button("Load Existing Config From File", variant="primary")
save_config_button = gr.Button("Save Current Config", variant="primary")
config_status = gr.Textbox(
label="Status",
lines=2,
interactive=False
)
load_config_button.click(
fn=update_ui_from_config,
inputs=[config_file_input],
outputs=[config_status]
)
The taint steps I wrote that find this vulnerability:
/**
* @name Gradio File Input Flow
* @description This query tracks data flow from Gradio's Button component to file decoding.
* @kind path-problem
* @problem.severity warning
* @id gradio-file
*/
import python
import semmle.python.ApiGraphs
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.TaintTracking
import MyFlow::PathGraph
class GradioButton extends RemoteFlowSource::Range {
GradioButton() {
exists(API::CallNode n |
n = API::moduleImport("gradio").getMember("Button").getReturn()
.getMember("click").getACall() |
this = n.getParameter(0, "fn").getParameter(_).asSource())
}
override string getSourceType() { result = "Gradio untrusted input" }
}
private module MyConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof GradioButton }
predicate isSink(DataFlow::Node sink) { exists(Decoding d | sink = d.getAnInput()) }
predicate isAdditionalFlowStep(DataFlow::Node nodefrom, DataFlow::Node nodeto) {
exists(DataFlow::AttrRead attr |
attr.accesses(nodefrom, _)
and nodeto = attr)
or
exists(FunctionValue func|
func = Value::named("open") and
nodefrom.asCfgNode() = func.getACall().getArg(0) and
nodeto.asCfgNode() = func.getACall()
)
}
}
module MyFlow = TaintTracking::Global<MyConfig>;
from MyFlow::PathNode source, MyFlow::PathNode sink
where MyFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Data Flow from a Gradio source to decoding"