Skip to content

Commit 2dbf46d

Browse files
committed
Text to SQL RFT example
1 parent 2f4e892 commit 2dbf46d

30 files changed

+83433
-0
lines changed

text-to-sql-ep-rft/Makefile

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
PYTHON ?= python
2+
PIP ?= pip
3+
4+
.PHONY: install
5+
install:
6+
$(PIP) install -r requirements.txt
7+
8+
.PHONY: all-data
9+
all-data: sim-prod extract-schema synth gen-queries augment ground-truth gen-nl
10+
11+
.PHONY: sim-prod
12+
sim-prod:
13+
$(PYTHON) scripts/01_simulate_prod_db.py
14+
15+
.PHONY: extract-schema
16+
extract-schema:
17+
$(PYTHON) scripts/02_extract_schema.py
18+
19+
.PHONY: synth
20+
synth:
21+
$(PYTHON) scripts/03_generate_synthetic_data.py
22+
23+
.PHONY: gen-queries
24+
gen-queries:
25+
$(PYTHON) scripts/04_generate_queries.py
26+
27+
.PHONY: augment
28+
augment:
29+
$(PYTHON) scripts/05_augment_sandbox.py
30+
31+
.PHONY: ground-truth
32+
ground-truth:
33+
$(PYTHON) scripts/06_ground_truth.py
34+
35+
.PHONY: gen-nl
36+
gen-nl:
37+
$(PYTHON) scripts/07_generate_nl_questions.py
38+
39+
.PHONY: test
40+
test:
41+
pytest -q
42+
43+
.PHONY: mcp-build
44+
mcp-build:
45+
cd mcp_server && docker build -t text-to-sql-mcp:latest .
46+
47+
.PHONY: mcp-deploy
48+
# Usage: make mcp-deploy PROJECT_ID=your-id REGION=us-central1
49+
mcp-deploy:
50+
gcloud run deploy mcp-sql-rft-server \
51+
--source mcp_server \
52+
--project $(PROJECT_ID) \
53+
--region $(REGION) \
54+
--allow-unauthenticated \
55+
--port 8080

text-to-sql-ep-rft/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
### Text-to-SQL RFT with Eval Protocol (End-to-End)
2+
3+
This repository demonstrates an end-to-end Natural Language → SQL workflow using Eval Protocol and Fireworks RFT, without the old reward-kit. It includes:
4+
- Data generation scripts (schema-only → synthetic DB → SQL → NL → train/test)
5+
- A Dockerized MCP server that exposes a read-only DuckDB database over HTTP
6+
- An Eval Protocol evaluator that executes model-generated SQL via MCP and scores results
7+
- Local smoke tests and Makefile helpers
8+
9+
#### Prerequisites
10+
- Python 3.11+ (recommend `uv` or `venv`)
11+
- `FIREWORKS_API_KEY` (Fireworks account)
12+
- Google Cloud SDK (for Cloud Run MCP deployment) if you want remote server
13+
- Optional: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY` for benchmarking additional models
14+
15+
#### Quickstart
16+
1) Create a Python environment in this folder and install:
17+
```
18+
pip install -r requirements.txt
19+
```
20+
21+
2) Generate data (OpenFlights → prod → synthetic → queries → ground-truth → NL):
22+
```
23+
make all-data
24+
```
25+
26+
3) Build and deploy MCP server to Cloud Run:
27+
```
28+
make mcp-deploy PROJECT_ID=your-gcp-project REGION=us-central1
29+
```
30+
Copy the service URL (without trailing `/mcp/`). Set `MCP_SERVER_URL` for the evaluator.
31+
32+
4) Test evaluator locally:
33+
```
34+
pytest -q
35+
```
36+
37+
5) Launch RFT (from `evaluator/` with `.env` containing FIREWORKS_API_KEY and MCP_SERVER_URL):
38+
```
39+
cd evaluator
40+
eval-protocol create rft --base-model accounts/fireworks/models/qwen2p5-7b
41+
```
42+
43+
6) Benchmark base vs tuned:
44+
```
45+
python scripts/benchmark_models.py
46+
```
47+
48+
See `scripts/` for individual steps and `mcp_server/` for Docker deployment details.

text-to-sql-ep-rft/data/airlines.dat

Lines changed: 6162 additions & 0 deletions
Large diffs are not rendered by default.

text-to-sql-ep-rft/data/airports.dat

Lines changed: 7698 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
"Bonaire, Saint Eustatius and Saba","BQ",""
2+
"Aruba","AW","AA"
3+
"Antigua and Barbuda","AG","AC"
4+
"United Arab Emirates","AE","AE"
5+
"Afghanistan","AF","AF"
6+
"Algeria","DZ","AG"
7+
"Azerbaijan","AZ","AJ"
8+
"Albania","AL","AL"
9+
"Armenia","AM","AM"
10+
"Angola","AO","AO"
11+
"American Samoa","AS","AQ"
12+
"Argentina","AR","AR"
13+
"Australia","AU","AS"
14+
"Ashmore and Cartier Islands",\N,"AT"
15+
"Austria","AT","AU"
16+
"Anguilla","AI","AV"
17+
"Antarctica","AQ","AY"
18+
"Bahrain","BH","BA"
19+
"Barbados","BB","BB"
20+
"Botswana","BW","BC"
21+
"Bermuda","BM","BD"
22+
"Belgium","BE","BE"
23+
"Bahamas","BS","BF"
24+
"Bangladesh","BD","BG"
25+
"Belize","BZ","BH"
26+
"Bosnia and Herzegovina","BA","BK"
27+
"Bolivia","BO","BL"
28+
"Myanmar","MM","BM"
29+
"Benin","BJ","BN"
30+
"Belarus","BY","BO"
31+
"Solomon Islands","SB","BP"
32+
"Navassa Island",\N,"BQ"
33+
"Brazil","BR","BR"
34+
"India","IN","BS"
35+
"Bhutan","BT","BT"
36+
"Bulgaria","BG","BU"
37+
"Bouvet Island","BV","BV"
38+
"Brunei Darussalam","BN","BX"
39+
"Burundi","BI","BY"
40+
"Canada","CA","CA"
41+
"Cambodia","KH","CB"
42+
"Chad","TD","CD"
43+
"Sri Lanka","LK","CE"
44+
"DR Congo","CD","CF"
45+
"Congo Republic","CG","CG"
46+
"China","CN","CH"
47+
"Chile","CL","CI"
48+
"Cayman Islands","KY","CJ"
49+
"Cocos (Keeling) Islands","CC","CK"
50+
"Cameroon","CM","CM"
51+
"Comoros","KM","CN"
52+
"Colombia","CO","CO"
53+
"Northern Mariana Islands","MP","CQ"
54+
"Coral Sea Islands",\N,"CR"
55+
"Costa Rica","CR","CS"
56+
"Central African Republic","CF","CT"
57+
"Cuba","CU","CU"
58+
"Cabo Verde","CV","CV"
59+
"Cook Islands","CK","CW"
60+
"Cyprus","CY","CY"
61+
"Denmark","DK","DA"
62+
"Djibouti","DJ","DJ"
63+
"Dominica","DM","DO"
64+
"Jarvis Island",\N,"DQ"
65+
"Dominican Republic","DO","DR"
66+
"Ecuador","EC","EC"
67+
"Egypt","EG","EG"
68+
"Ireland","IE","EI"
69+
"Equatorial Guinea","GQ","EK"
70+
"Estonia","EE","EN"
71+
"Eritrea","ER","ER"
72+
"El Salvador","SV","ES"
73+
"Ethiopia","ET","ET"
74+
"Europa Island",\N,"EU"
75+
"Czech Republic","CZ","EZ"
76+
"French Guiana","GF","FG"
77+
"Finland","FI","FI"
78+
"Fiji","FJ","FJ"
79+
"Falkland Islands","FK","FK"
80+
"Micronesia, Fed. Sts.","FM","FM"
81+
"Faeroe Islands","FO","FO"
82+
"French Polynesia","PF","FP"
83+
"Baker Island",\N,"FQ"
84+
"France","FR","FR"
85+
"French Southern Territories","TF","FS"
86+
"Gambia","GM","GA"
87+
"Gabon","GA","GB"
88+
"Georgia","GE","GG"
89+
"Ghana","GH","GH"
90+
"Gibraltar","GI","GI"
91+
"Grenada","GD","GJ"
92+
"Guernsey","GG","GK"
93+
"Greenland","GL","GL"
94+
"Germany","DE","GM"
95+
"Glorioso Islands",\N,"GO"
96+
"Guadeloupe","GP","GP"
97+
"Guam","GU","GQ"
98+
"Greece","GR","GR"
99+
"Guatemala","GT","GT"
100+
"Guinea","GN","GV"
101+
"Guyana","GY","GY"
102+
"Palestine","PS","GZ"
103+
"Haiti","HT","HA"
104+
"Hong Kong","HK","HK"
105+
"Heard and McDonald Islands","HM","HM"
106+
"Honduras","HN","HO"
107+
"Howland Island",\N,"HQ"
108+
"Croatia","HR","HR"
109+
"Hungary","HU","HU"
110+
"Iceland","IS","IC"
111+
"Indonesia","ID","ID"
112+
"Isle of Man","IM","IM"
113+
"India","IN","IN"
114+
"British Indian Ocean Territory","IO","IO"
115+
"Clipperton Island",\N,"IP"
116+
"Iran","IR","IR"
117+
"Israel","IL","IS"
118+
"Italy","IT","IT"
119+
"Cote d'Ivoire","CI","IV"
120+
"Iraq","IQ","IZ"
121+
"Japan","JP","JA"
122+
"Jersey","JE","JE"
123+
"Jamaica","JM","JM"
124+
"Jan Mayen",\N,"JN"
125+
"Jordan","JO","JO"
126+
"Johnston Atoll",\N,"JQ"
127+
"Juan de Nova Island",\N,"JU"
128+
"Kenya","KE","KE"
129+
"Kyrgyz Republic","KG","KG"
130+
"North Korea","KP","KN"
131+
"Kingman Reef",\N,"KQ"
132+
"Kiribati","KI","KR"
133+
"South Korea","KR","KS"
134+
"Christmas Island","CX","KT"
135+
"Kuwait","KW","KU"
136+
"Kazakhstan","KZ","KZ"
137+
"Laos","LA","LA"
138+
"Lebanon","LB","LE"
139+
"Latvia","LV","LG"
140+
"Lithuania","LT","LH"
141+
"Liberia","LR","LI"
142+
"Slovakia","SK","LO"
143+
"Palmyra Atoll",\N,"LQ"
144+
"Lesotho","LS","LT"
145+
"Luxembourg","LU","LU"
146+
"Libya","LY","LY"
147+
"Madagascar","MG","MA"
148+
"Martinique","MQ","MB"
149+
"Macao","MO","MC"
150+
"Moldova","MD","MD"
151+
"Mayotte","YT","MF"
152+
"Mongolia","MN","MG"
153+
"Montserrat","MS","MH"
154+
"Malawi","MW","MI"
155+
"Montenegro","ME","MJ"
156+
"Macedonia","MK","MK"
157+
"Mali","ML","ML"
158+
"Monaco","MC","MN"
159+
"Morocco","MA","MO"
160+
"Mauritius","MU","MP"
161+
"Midway Islands",\N,"MQ"
162+
"Mauritania","MR","MR"
163+
"Malta","MT","MT"
164+
"Oman","OM","MU"
165+
"Maldives","MV","MV"
166+
"Mexico","MX","MX"
167+
"Malaysia","MY","MY"
168+
"Mozambique","MZ","MZ"
169+
"New Caledonia","NC","NC"
170+
"Niue","NU","NE"
171+
"Norfolk Island","NF","NF"
172+
"Niger","NE","NG"
173+
"Vanuatu","VU","NH"
174+
"Nigeria","NG","NI"
175+
"Netherlands","NL","NL"
176+
"Norway","NO","NO"
177+
"Nepal","NP","NP"
178+
"Nauru","NR","NR"
179+
"Suriname","SR","NS"
180+
"Netherlands Antilles","AN","NT"
181+
"Nicaragua","NI","NU"
182+
"New Zealand","NZ","NZ"
183+
"Paraguay","PY","PA"
184+
"Pitcairn","PN","PC"
185+
"Peru","PE","PE"
186+
"Paracel Islands",\N,"PF"
187+
"Spratly Islands",\N,"PG"
188+
"Pakistan","PK","PK"
189+
"Poland","PL","PL"
190+
"Panama","PA","PM"
191+
"Portugal","PT","PO"
192+
"Papua New Guinea","PG","PP"
193+
"Palau","PW","PS"
194+
"Guinea-Bissau","GW","PU"
195+
"Qatar","QA","QA"
196+
"Serbia","RS","RB"
197+
"Reunion","RE","RE"
198+
"Marshall Islands","MH","RM"
199+
"Romania","RO","RO"
200+
"Philippines","PH","RP"
201+
"Puerto Rico","PR","RQ"
202+
"Russia","RU","RS"
203+
"Rwanda","RW","RW"
204+
"Saudi Arabia","SA","SA"
205+
"St. Pierre and Miquelon","PM","SB"
206+
"St. Kitts and Nevis","KN","SC"
207+
"Seychelles","SC","SE"
208+
"South Africa","ZA","SF"
209+
"Senegal","SN","SG"
210+
"St. Helena","SH","SH"
211+
"Slovenia","SI","SI"
212+
"Sierra Leone","SL","SL"
213+
"Singapore","SG","SN"
214+
"Somalia","SO","SO"
215+
"Spain","ES","SP"
216+
"South Sudan","SS","SS"
217+
"St. Lucia","LC","ST"
218+
"Sudan","SD","SU"
219+
"Svalbard and Jan Mayen Islands","SJ","SV"
220+
"Sweden","SE","SW"
221+
"South Georgia and South Sandwich Is.","GS","SX"
222+
"Syria","SY","SY"
223+
"Switzerland","CH","SZ"
224+
"Trinidad and Tobago","TT","TD"
225+
"Tromelin Island",\N,"TE"
226+
"Thailand","TH","TH"
227+
"Tajikistan","TJ","TI"
228+
"Turks and Caicos Islands","TC","TK"
229+
"Tokelau","TK","TL"
230+
"Tonga","TO","TN"
231+
"Togo","TG","TO"
232+
"Sao Tome and Principe","ST","TP"
233+
"Tunisia","TN","TS"
234+
"Timor-Leste","TL","TT"
235+
"Turkey","TR","TU"
236+
"Tuvalu","TV","TV"
237+
"Taiwan","TW","TW"
238+
"Turkmenistan","TM","TX"
239+
"Tanzania","TZ","TZ"
240+
"Uganda","UG","UG"
241+
"United Kingdom","GB","UK"
242+
"Ukraine","UA","UP"
243+
"United States","US","US"
244+
"Burkina Faso","BF","UV"
245+
"Uruguay","UY","UY"
246+
"Uzbekistan","UZ","UZ"
247+
"St. Vincent and the Grenadines","VC","VC"
248+
"Venezuela","VE","VE"
249+
"British Virgin Islands","VG","VI"
250+
"Vietnam","VN","VM"
251+
"United States Virgin Islands","VI","VQ"
252+
"Namibia","NA","WA"
253+
"Palestine","PS","WE"
254+
"Wallis and Futuna Islands","WF","WF"
255+
"Western Sahara","EH","WI"
256+
"Wake Island",\N,"WQ"
257+
"Samoa","WS","WS"
258+
"Eswatini","SZ","WZ"
259+
"Yemen","YE","YM"
260+
"Zambia","ZM","ZA"
261+
"Zimbabwe","ZW","ZI"

0 commit comments

Comments
 (0)