Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion legendql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
from legendql.ql import query, LegendQL
from legendql.store import store
from legendql.functions import aggregate, over, unbounded, rows, range, left, avg, count, sum, rank, lead, lag, row_number
from legendql.sql_parser import sql_to_pure_relation, SQLToLegendQLConverter

__all__ = ['db', 'table', 'using', 'using_db', 'using_db_def', 'query', 'LegendQL', 'store', 'aggregate', 'over', 'unbounded', 'rows', 'range', 'left', 'avg', 'count', 'sum', 'rank', 'lead', 'lag', 'row_number']
__all__ = ['db', 'table', 'using', 'using_db', 'using_db_def', 'query', 'LegendQL', 'store', 'aggregate', 'over', 'unbounded', 'rows', 'range', 'left', 'avg', 'count', 'sum', 'rank', 'lead', 'lag', 'row_number', 'sql_to_pure_relation', 'SQLToLegendQLConverter']
32 changes: 32 additions & 0 deletions legendql/sql_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Examples demonstrating SQL to Pure Relation conversion using SQLGlot."""

import pyarrow as pa
from legendql.sql_parser import sql_to_pure_relation
from legendql.model.schema import Database, Table
from legendql.runtime.pure.db.h2 import H2DatabaseDefinition

employees_table = Table(
table="employees",
columns=[
pa.field("id", pa.int32()),
pa.field("name", pa.utf8()),
pa.field("department", pa.utf8()),
pa.field("salary", pa.float32())
]
)

database = Database(name="company", children=[employees_table])
db_def = H2DatabaseDefinition(sqls=[])

sql1 = "SELECT id, name FROM employees"
pure_code1 = sql_to_pure_relation(sql1, db_def, database)
print("Example 1:")
print(f"SQL: {sql1}")
print(f"Pure Relation: {pure_code1}")
print()

sql2 = "SELECT name, department, salary FROM employees"
pure_code2 = sql_to_pure_relation(sql2, db_def, database)
print("Example 2:")
print(f"SQL: {sql2}")
print(f"Pure Relation: {pure_code2}")
104 changes: 104 additions & 0 deletions legendql/sql_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright 2025 Goldman Sachs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations
from typing import List, Optional
import sqlglot
from sqlglot import exp, parse_one

from legendql.model.metamodel import (
SelectionClause, DatabaseFromClause, ColumnReferenceExpression,
Expression, Clause
)
from legendql.model.schema import Database, Table, Schema
from legendql.query import Query
from legendql.runtime.pure.db.type import DatabaseDefinition


class SQLToLegendQLConverter:
"""Converts SQL statements to LegendQL metamodel using SQLGlot parsing."""

def __init__(self, database_definition: DatabaseDefinition, database: Database):
self.database_definition = database_definition
self.database = database

def parse_sql_to_query(self, sql: str) -> Query:
"""Parse SQL string and convert to LegendQL Query object."""
try:
parsed = parse_one(sql)
if not isinstance(parsed, exp.Select):
raise ValueError(f"Only SELECT statements are currently supported, got: {type(parsed)}")

return self._convert_select_to_query(parsed)
except Exception as e:
raise ValueError(f"Failed to parse SQL: {sql}. Error: {str(e)}")

def _convert_select_to_query(self, select_ast: exp.Select) -> Query:
"""Convert SQLGlot Select AST to LegendQL Query."""
table_info = self._extract_table_info(select_ast)

query = Query.from_table(
self.database_definition,
self.database,
table_info['table_name']
)

if select_ast.expressions:
selection_expressions = []
for expr in select_ast.expressions:
legend_expr = self._convert_expression(expr)
selection_expressions.append(legend_expr)

query._clauses = [query._clauses[0]] # Keep the FROM clause
query._add_clause(SelectionClause(selection_expressions))

return query

def _extract_table_info(self, select_ast: exp.Select) -> dict:
"""Extract table name and schema information from SELECT AST."""
table_node = select_ast.find(exp.Table)
if not table_node:
raise ValueError("No table found in SELECT statement")

return {
'table_name': table_node.name,
'schema_name': table_node.db if hasattr(table_node, 'db') else None,
'database_name': table_node.catalog if hasattr(table_node, 'catalog') else None
}

def _convert_expression(self, expr: exp.Expression) -> Expression:
"""Convert SQLGlot expression to LegendQL Expression."""
if isinstance(expr, exp.Column):
return ColumnReferenceExpression(expr.name)
elif isinstance(expr, exp.Star):
raise NotImplementedError("SELECT * is not yet implemented")
else:
return ColumnReferenceExpression(str(expr))


def sql_to_pure_relation(sql: str, database_definition: DatabaseDefinition, database: Database) -> str:
"""
Convert SQL statement to Pure Relation code.

Args:
sql: SQL statement to convert
database_definition: Database definition for execution
database: Database schema information

Returns:
Pure Relation code string
"""
converter = SQLToLegendQLConverter(database_definition, database)
query = converter.parse_sql_to_query(sql)
return query.to_string()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ numpy = [
{ version = ">=1.20.0", python = "<3.12" },
{ version = ">=1.26.0", python = ">=3.12" }
]
sqlglot = ">=26.0.0"


[tool.poetry.group.dev.dependencies]
Expand Down
40 changes: 40 additions & 0 deletions test_sql_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
"""Test script for SQLGlot integration with LegendQL."""

import pyarrow as pa
from legendql.sql_parser import sql_to_pure_relation
from legendql.model.schema import Database, Table
from legendql.runtime.pure.db.h2 import H2DatabaseDefinition

def test_basic_sql_conversion():
"""Test basic SQL SELECT conversion to Pure Relation code."""

table = Table(
table="tableC",
columns=[
pa.field("colA", pa.utf8()),
pa.field("colB", pa.int32())
]
)
database = Database(name="testdb", children=[table])

db_def = H2DatabaseDefinition(sqls=[])

sql = "SELECT colA, colB FROM tableC"

try:
pure_relation_code = sql_to_pure_relation(sql, db_def, database)
print("SQL:", sql)
print("Pure Relation Code:", pure_relation_code)
print("✅ Basic SQL conversion successful!")
return True
except Exception as e:
print(f"❌ Error converting SQL: {e}")
return False

if __name__ == "__main__":
success = test_basic_sql_conversion()
if success:
print("\n🎉 SQLGlot integration test passed!")
else:
print("\n💥 SQLGlot integration test failed!")