diff --git a/distribution/entrypoints/omgidl_to_ifex.py b/distribution/entrypoints/omgidl_to_ifex.py new file mode 100644 index 0000000..af17e12 --- /dev/null +++ b/distribution/entrypoints/omgidl_to_ifex.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH. +# SPDX-License-Identifier: MPL-2.0 + +# User-invocation script for OMG IDL to IFEX conversion + +from ifex.models.omgidl.omgidl_lark import get_ast_from_idl_file +from ifex.input_filters.omgidl.omgidl_to_ifex import omgidl_to_ifex +from ifex.models.common.ast_utils import ast_as_yaml +import argparse + + +def omgidl_to_ifex_run(): + + parser = argparse.ArgumentParser(description='Runs OMG IDL to IFEX translator.') + parser.add_argument('input', metavar='file.idl', type=str, help='path to input .idl file') + + try: + args = parser.parse_args() + idl_ast = get_ast_from_idl_file(args.input) + ifex_ast = omgidl_to_ifex(idl_ast) + print(ast_as_yaml(ifex_ast)) + + except FileNotFoundError: + print(f"ERROR: File not found: {args.input}") + except Exception as e: + print(f"ERROR: Conversion error for {args.input}: {e}") + raise + + +if __name__ == "__main__": + omgidl_to_ifex_run() diff --git a/ifex/input_filters/omgidl/__init__.py b/ifex/input_filters/omgidl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ifex/input_filters/omgidl/omgidl_to_ifex.py b/ifex/input_filters/omgidl/omgidl_to_ifex.py new file mode 100644 index 0000000..cccbda5 --- /dev/null +++ b/ifex/input_filters/omgidl/omgidl_to_ifex.py @@ -0,0 +1,285 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH. +# SPDX-License-Identifier: MPL-2.0 + +# This file is part of the IFEX project +# vim: tw=120 ts=4 et + +""" +Convert an OMG IDL AST (IDLFile) to an IFEX AST (ifex.AST). + +Public API: + omgidl_to_ifex(idl_file: IDLFile) -> ifex.AST + +Follows the same approach as aidl_to_ifex.py / protobuf_to_ifex.py: + - Manual tree-walking (no rule_translator) + - Capitalised function names to make AST type names stand out + +Mapping summary +--------------- +OMG IDL IFEX +------- ---- +Module -> Namespace (nested modules = nested Namespaces) +Interface -> ifex.Interface (inside a Namespace) +Operation(oneway=False) -> ifex.Method +Operation(oneway=True) -> ifex.Event +Parameter(dir="in"/"inout") -> Argument in Method.input +Parameter(dir="out"/"inout") -> Argument in Method.output +return_type != "void" -> Argument(name="_return") in Method.returns +raises clause -> Method.errors (list of ifex.Error) +Attribute -> ifex.Property +Attribute(readonly=True) -> ifex.Property (description notes read-only) +Struct -> ifex.Struct +Member -> ifex.Member +Enum -> ifex.Enumeration (datatype="int32", values 0,1,2,...) +Enumerator -> ifex.Option +Exception_ -> ifex.Struct (name prefixed "Ex_") +Typedef -> ifex.Typedef + +Top-level declarations (outside any module) are collected into a single +anonymous Namespace named "_global_". + +For a file with a single top-level module, the result is: + ifex.AST(namespaces=[Namespace(name=module.name, ...)]) +""" + +import ifex.models.ifex.ifex_ast as ifex +from ifex.models.omgidl.omgidl_ast import ( + IDLFile, Module, Interface, Operation, Attribute, Const, + Parameter, Struct, Enum, Enumerator, Exception_, Typedef, Member, +) + + +# ============================================================ +# Type translation (OMG IDL primitives → IFEX types) +# ============================================================ + +_type_map = { + # Exact OMG IDL primitive type names → IFEX fundamental types + "boolean": "boolean", + "octet": "uint8", + "char": "uint8", + "wchar": "uint8", + "short": "int16", + "unsigned short": "uint16", + "long": "int32", + "unsigned long": "uint32", + "long long": "int64", + "unsigned long long": "uint64", + "float": "float", + "double": "double", + "long double": "double", # IFEX has no extended precision + "string": "string", + "wstring": "string", + "any": "opaque", + "Object": "opaque", + "void": "void", # internal sentinel +} + + +def translate_type(t: str) -> str: + """Map an OMG IDL type string to an IFEX type string. + + Handles: + - Primitive types via _type_map + - Array types: "long[3]" → "int32[]" (size information is lost) + - Sequence types: "sequence" → "float[]" + "sequence" → "float[]" (bound lost) + - Unknown / user-defined types pass through unchanged. + """ + # sequence or sequence + if t.startswith("sequence<"): + inner = t[len("sequence<"):].rstrip(">") + # strip optional bound ",N" + elem_type = inner.split(",")[0].strip() + return translate_type(elem_type) + "[]" + + # Fixed-size array: "long[3]" — strip the [N] suffix + if "[" in t: + base = t[:t.index("[")].strip() + return translate_type(base) + "[]" + + return _type_map.get(t, t) + + +# ============================================================ +# Conversion helpers (Capitalised names = AST types visible) +# ============================================================ + +def Params_to_Input(parameters) -> list: + return [ifex.Argument(name=p.name, datatype=translate_type(p.datatype)) + for p in (parameters or []) if p.direction in ('in', 'inout')] + + +def Params_to_Output(parameters) -> list: + return [ifex.Argument(name=p.name, datatype=translate_type(p.datatype)) + for p in (parameters or []) if p.direction in ('out', 'inout')] + + +def Operation_to_Returns(op: Operation) -> list: + if op.return_type and op.return_type != 'void': + return [ifex.Argument(name='_return', datatype=translate_type(op.return_type))] + return [] + + +def Raises_to_Errors(raises) -> list: + """Convert a raises list (exception type names) to ifex.Error objects.""" + return [ifex.Error(datatype=name) for name in (raises or [])] + + +def Operations_to_Methods(operations) -> list: + result = [] + for op in (operations or []): + if op.oneway: + continue + input_args = Params_to_Input(op.parameters) + output_args = Params_to_Output(op.parameters) + returns = Operation_to_Returns(op) + errors = Raises_to_Errors(op.raises) + result.append(ifex.Method( + name = op.name, + input = input_args if input_args else None, + output = output_args if output_args else None, + returns = returns if returns else None, + errors = errors if errors else None, + )) + return result + + +def Operations_to_Events(operations) -> list: + result = [] + for op in (operations or []): + if not op.oneway: + continue + input_args = Params_to_Input(op.parameters) + result.append(ifex.Event( + name = op.name, + input = input_args if input_args else None, + )) + return result + + +def Attribute_to_Property(attr: Attribute) -> ifex.Property: + desc = "readonly" if attr.readonly else None + return ifex.Property(name=attr.name, datatype=translate_type(attr.datatype), + description=desc) + + +def Members_to_Members(members) -> list: + return [ifex.Member(name=m.name, datatype=translate_type(m.datatype)) + for m in (members or [])] + + +def Struct_to_Struct(s: Struct) -> ifex.Struct: + return ifex.Struct( + name = s.name, + members = Members_to_Members(s.members) or None, + ) + + +def Exception_to_Struct(e: Exception_) -> ifex.Struct: + """Convert an IDL exception to an IFEX Struct, prefixing name with Ex_.""" + return ifex.Struct( + name = "Ex_" + e.name, + members = Members_to_Members(e.members) or None, + ) + + +def Enum_to_Enumeration(e: Enum) -> ifex.Enumeration: + """Convert an IDL enum to an IFEX Enumeration. + + OMG IDL enumerators have no explicit integer values; they are + assigned 0, 1, 2, ... in declaration order. + """ + options = [] + for idx, en in enumerate(e.enumerators or []): + options.append(ifex.Option(name=en.name, value=idx)) + return ifex.Enumeration( + name = e.name, + datatype = 'int32', + options = options, + ) + + +def Typedef_to_Typedef(t: Typedef) -> ifex.Typedef: + return ifex.Typedef(name=t.name, datatype=translate_type(t.datatype)) + + +def Interface_to_Interface(iface: Interface) -> ifex.Interface: + methods = Operations_to_Methods(iface.operations) + events = Operations_to_Events(iface.operations) + properties = [Attribute_to_Property(a) for a in (iface.attributes or [])] + return ifex.Interface( + name = iface.name, + methods = methods if methods else None, + events = events if events else None, + properties = properties if properties else None, + ) + + +def Module_to_Namespace(module: Module) -> ifex.Namespace: + """Recursively convert a Module to a Namespace.""" + structs = [Struct_to_Struct(s) for s in (module.structs or [])] + structs += [Exception_to_Struct(e) for e in (module.exceptions or [])] + enumerations = [Enum_to_Enumeration(e) for e in (module.enums or [])] + typedefs = [Typedef_to_Typedef(t) for t in (module.typedefs or [])] + sub_ns = [Module_to_Namespace(m) for m in (module.modules or [])] + + ns = ifex.Namespace( + name = module.name, + structs = structs if structs else None, + enumerations = enumerations if enumerations else None, + typedefs = typedefs if typedefs else None, + namespaces = sub_ns if sub_ns else None, + ) + + # At most one interface per namespace in IFEX + if module.interfaces: + if len(module.interfaces) > 1: + # Multiple interfaces: take the first and warn + import sys + print(f"WARNING: module '{module.name}' has {len(module.interfaces)} interfaces; " + f"only '{module.interfaces[0].name}' is mapped to ifex.Interface. " + f"Remaining interfaces are ignored.", file=sys.stderr) + ns.interface = Interface_to_Interface(module.interfaces[0]) + + return ns + + +# ============================================================ +# Main conversion entry point +# ============================================================ + +def omgidl_to_ifex(idl_file: IDLFile) -> ifex.AST: + """Convert an IDLFile AST to an IFEX AST. + + Each top-level module becomes a Namespace. Top-level declarations + outside any module are placed in a single '_global_' Namespace. + + :param idl_file: parsed IDLFile AST + :return: ifex.AST + """ + namespaces = [] + + # Modules → Namespaces + for module in (idl_file.modules or []): + namespaces.append(Module_to_Namespace(module)) + + # Top-level declarations outside any module → '_global_' Namespace + global_structs = [Struct_to_Struct(s) for s in (idl_file.structs or [])] + global_structs += [Exception_to_Struct(e) for e in (idl_file.exceptions or [])] + global_enums = [Enum_to_Enumeration(e) for e in (idl_file.enums or [])] + global_typedefs = [Typedef_to_Typedef(t) for t in (idl_file.typedefs or [])] + + global_ns = None + if global_structs or global_enums or global_typedefs or idl_file.interfaces: + global_ns = ifex.Namespace( + name = '_global_', + structs = global_structs if global_structs else None, + enumerations = global_enums if global_enums else None, + typedefs = global_typedefs if global_typedefs else None, + ) + if idl_file.interfaces: + global_ns.interface = Interface_to_Interface(idl_file.interfaces[0]) + namespaces.append(global_ns) + + return ifex.AST(namespaces=namespaces if namespaces else None) diff --git a/ifex/models/omgidl/__init__.py b/ifex/models/omgidl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ifex/models/omgidl/omgidl.grammar b/ifex/models/omgidl/omgidl.grammar new file mode 100644 index 0000000..db4b04b --- /dev/null +++ b/ifex/models/omgidl/omgidl.grammar @@ -0,0 +1,275 @@ +# ============================================================ +# OMG IDL (Object Management Group Interface Definition Language) +# LARK GRAMMAR +# ============================================================ +# +# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH. +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the IFEX project +# +# Grammar for Lark parser, covering the subset of OMG IDL 4.2 +# represented in omgidl_ast.py. +# +# Reference: OMG IDL 4.2 (formal/2021-06-01) +# https://www.omg.org/spec/IDL/4.2 +# +# Grammar conventions (identical to aidl.grammar / protobuf.grammar): +# -------------------------------------------------------------------- +# - Composite rules use lowercase names +# - Terminal/token names use UPPERCASE +# - Names prefixed X_ are additions to preserve semantic keywords that +# Lark would otherwise silently discard (e.g. X_ONEWAY, X_READONLY) +# - ? prefix on rules = Lark "inline/transparent" rule (not stored) +# - Parser mode: LALR +# - Preprocessor lines (#include, #pragma, #define, etc.) and comments +# (// and /* */) are stripped by filter_comments() before Lark sees +# the text — they are NOT in this grammar. +# +# Subset scope: +# - module (nestable) +# - interface (with inheritance) +# - operation (with in/out/inout params, raises, oneway) +# - attribute / readonly attribute +# - const +# - struct +# - exception +# - enum (identifier-only enumerators, no explicit integer assignment) +# - typedef (scalar and sequence aliases) +# - sequence / sequence +# - Fixed-size array types T[N] +# - Scoped names (A::B::C) +# +# Intentionally NOT covered (see implementation_evidence.md): +# - union / switch +# - valuetype / abstract interface / local interface +# - context clause on operations +# - #pragma version +# - Multiple-word primitive types as a single terminal (handled +# by multi-token normalisation in omgidl_lark.py instead) +# +# ============================================================ + + +# ============================================================ +# Start rule +# ============================================================ + +?start: idl_file + + +# ============================================================ +# Top-level file +# ============================================================ + +# An IDL file is zero or more top-level definitions. +# (Unlike AIDL, multiple declarations per file are allowed.) +idl_file: definition* + + +# ============================================================ +# Definitions (top-level and inside modules) +# ============================================================ + +# from spec §7.2.2: +# definition ::= type_dcl ";" | const_dcl ";" | except_dcl ";" +# | interface ";" | module ";" | ... +?definition: module_decl + | interface_decl + | struct_decl + | enum_decl + | exception_decl + | typedef_decl + | const_dcl + + +# ============================================================ +# Module +# ============================================================ + +# from spec §7.2.3: +# module_dcl ::= "module" identifier "{" definition+ "}" +module_decl: "module" IDENT "{" definition* "}" ";" + + +# ============================================================ +# Interface +# ============================================================ + +# from spec §7.4.11: +# interface_dcl ::= interface_header "{" interface_body "}" +# interface_header ::= "interface" identifier [inheritance_spec] +# inheritance_spec ::= ":" scoped_name {"," scoped_name} +interface_decl: "interface" IDENT inheritance_spec? "{" interface_member* "}" ";" + +inheritance_spec: ":" scoped_name ("," scoped_name)* + +?interface_member: operation_decl + | attribute_decl + | const_dcl + + +# ============================================================ +# Operation (method) +# ============================================================ + +# from spec §7.4.12: +# op_dcl ::= [op_attribute] op_type_spec identifier +# parameter_dcls [raises_expr] +# op_attribute ::= "oneway" +# op_type_spec ::= param_type_spec | "void" +operation_decl: X_ONEWAY? op_return_type IDENT "(" param_list? ")" raises_clause? ";" + +op_return_type: "void" -> void_return + | type_spec -> typed_return + +param_list: param ("," param)* + +# from spec §7.4.12 (param_dcl): +# param_dcl ::= param_attribute simple_type_spec declarator +# param_attribute ::= "in" | "out" | "inout" +param: X_DIRECTION type_spec IDENT + +# from spec §7.4.12 (raises_expr): +# raises_expr ::= "raises" "(" scoped_name {"," scoped_name} ")" +raises_clause: "raises" "(" scoped_name ("," scoped_name)* ")" + + +# ============================================================ +# Attribute +# ============================================================ + +# from spec §7.4.13: +# attr_dcl ::= ["readonly"] "attribute" param_type_spec +# simple_declarator {"," simple_declarator} +attribute_decl: X_READONLY? "attribute" type_spec IDENT ";" + + +# ============================================================ +# Const +# ============================================================ + +# from spec §7.4.4: +# const_dcl ::= "const" const_type identifier "=" const_exp +const_dcl: "const" type_spec IDENT "=" CONST_VALUE ";" + + +# ============================================================ +# Struct +# ============================================================ + +# from spec §7.4.9: +# struct_type ::= "struct" identifier "{" member+ "}" +struct_decl: "struct" IDENT "{" member_decl+ "}" ";" + +member_decl: type_spec IDENT ";" + + +# ============================================================ +# Exception +# ============================================================ + +# from spec §7.4.15: +# except_dcl ::= "exception" identifier "{" member* "}" +exception_decl: "exception" IDENT "{" member_decl* "}" ";" + + +# ============================================================ +# Enum +# ============================================================ + +# from spec §7.4.14: +# enum_type ::= "enum" identifier "{" enumerator {"," enumerator} "}" +# enumerator ::= identifier +# Note: OMG IDL enumerators do NOT have explicit numeric values — +# values are implicitly 0, 1, 2, ... in declaration order. +enum_decl: "enum" IDENT "{" IDENT ("," IDENT)* "}" ";" + + +# ============================================================ +# Typedef +# ============================================================ + +# from spec §7.4.8: +# type_dcl ::= "typedef" type_declarator +# type_declarator ::= type_spec declarators +typedef_decl: "typedef" type_spec IDENT ";" + + +# ============================================================ +# Type specifications +# ============================================================ + +# from spec §7.4.1 (type_spec): +# We cover: scoped names (named types), sequence, and array suffix. +# Multi-word primitives (unsigned long, long long, etc.) are handled +# as a single IDENT-like token via BUILTIN_TYPE to avoid LALR ambiguity. +type_spec: sequence_type + | BUILTIN_TYPE ARRAY_SUFFIX? -> builtin_array_type + | IDENT ARRAY_SUFFIX? -> named_array_type + +# from spec §7.4.2 (sequence_type): +# sequence_type ::= "sequence" "<" simple_type_spec ["," positive_int_const] ">" +sequence_type: "sequence" "<" (BUILTIN_TYPE | IDENT) ("," INT)? ">" + +# from spec §7.4.11 (scoped_name): +# scoped_name ::= identifier | "::" identifier | scoped_name "::" identifier +scoped_name: "::"? IDENT ("::" IDENT)* + + +# ============================================================ +# Terminals (tokens) +# ============================================================ + +# Direction keywords for operation parameters +X_DIRECTION: "in" | "out" | "inout" + +# Operation modifier +X_ONEWAY: "oneway" + +# Attribute modifier +X_READONLY: "readonly" + +# Fixed-size array suffix: [N] (e.g. float coords[3]) +ARRAY_SUFFIX: "[" INT "]" + +# OMG IDL built-in multi-word and single-word primitive types. +# Defined as a prioritised terminal so it wins over IDENT for these words. +# Multi-word forms (unsigned long, long long, etc.) are flattened here +# because LALR grammars handle keyword sequences awkwardly. +BUILTIN_TYPE.2: "unsigned long long" + | "unsigned long" + | "unsigned short" + | "long long" + | "long double" + | "long" + | "short" + | "float" + | "double" + | "boolean" + | "char" + | "wchar" + | "octet" + | "string" + | "wstring" + | "any" + | "Object" + | "void" + +# Simple identifier (lower priority than BUILTIN_TYPE) +IDENT: /[A-Za-z_][A-Za-z0-9_]*/ + +# Integer literal (unsigned, used in array sizes and sequence bounds) +INT: /[0-9]+/ + +# Const value: everything up to the semicolon (loose match, covers +# integers, hex literals, float literals, string literals, etc.) +CONST_VALUE: /[^\s;][^;]*/ + + +# ============================================================ +# Whitespace +# ============================================================ + +%import common (WS) +%ignore WS diff --git a/ifex/models/omgidl/omgidl_ast.py b/ifex/models/omgidl/omgidl_ast.py new file mode 100644 index 0000000..a65a2f5 --- /dev/null +++ b/ifex/models/omgidl/omgidl_ast.py @@ -0,0 +1,238 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH. +# SPDX-License-Identifier: MPL-2.0 + +# This file is part of the IFEX project +# vim: tw=120 ts=4 et + +""" +Node types representing an OMG IDL (Object Management Group Interface +Definition Language) AST. Mirrors the structure of aidl_ast.py / +protobuf_ast.py. + +Reference specification: OMG IDL 4.2 (formal/2021-06-01) +https://www.omg.org/spec/IDL/4.2 + +Key structural differences from AIDL: + - An IDL *file* may contain multiple top-level declarations (modules, + interfaces, structs, enums, typedefs, etc.) — no one-per-file rule. + - Namespacing is done with `module` blocks (nestable), not `package`. + - Interfaces support inheritance (single and multiple). + - Operations may declare exceptions via `raises(...)`. + - `attribute` / `readonly attribute` declares a property. + - `exception` is a first-class named type (like a struct with semantics). + - `typedef` creates type aliases. + - `enum` values are *ordered names* — no explicit integer assignment. + - `sequence` is a variable-length collection type. +""" + +from __future__ import annotations +from dataclasses import dataclass, field +from typing import List, Optional + + +# ============================================================ +# Leaf / shared types +# ============================================================ + +@dataclass +class Member: + """A field inside a struct or exception body. + + Spec §7.4.9 (struct), §7.4.15 (exception):: + + member ::= type_spec declarators ";" + """ + name: str + datatype: str + + +@dataclass +class Parameter: + """A parameter in an operation signature. + + Spec §7.4.12 (op_param_attribute):: + + param_dcl ::= param_attribute simple_type_spec declarator + param_attribute ::= "in" | "out" | "inout" + """ + name: str + datatype: str + direction: str = "in" # "in" | "out" | "inout" + + +@dataclass +class Enumerator: + """A single enumerator inside an enum. + + Spec §7.4.14:: + + enum_type ::= "enum" identifier "{" enumerator {"," enumerator} "}" + enumerator ::= identifier + """ + name: str + + +@dataclass +class Const: + """A named constant. + + Spec §7.4.4:: + + const_dcl ::= "const" const_type identifier "=" const_exp + """ + name: str + datatype: str + value: str + + +# ============================================================ +# Constructed types +# ============================================================ + +@dataclass +class Struct: + """An IDL struct (value type aggregate). + + Spec §7.4.9:: + + struct_type ::= "struct" identifier "{" member+ "}" + """ + name: str + members: Optional[List[Member]] = None + + +@dataclass +class Enum: + """An IDL enumeration. + + Spec §7.4.14:: + + enum_type ::= "enum" identifier "{" enumerator {"," enumerator} "}" + """ + name: str + enumerators: Optional[List[Enumerator]] = None + + +@dataclass +class Exception_: + """An IDL exception type. + + Named Exception_ to avoid collision with the Python built-in. + + Spec §7.4.15:: + + except_dcl ::= "exception" identifier "{" member* "}" + """ + name: str + members: Optional[List[Member]] = None + + +@dataclass +class Typedef: + """An IDL type alias. + + Spec §7.4.8:: + + type_dcl ::= "typedef" type_declarator + type_declarator ::= type_spec declarators + """ + name: str + datatype: str # e.g. "long", "sequence", "ClimateZone[3]" + + +# ============================================================ +# Interface members +# ============================================================ + +@dataclass +class Operation: + """An interface operation (method). + + Spec §7.4.12:: + + op_dcl ::= [op_attribute] op_type_spec identifier + parameter_dcls [raises_expr] + op_attribute ::= "oneway" + """ + name: str + return_type: str # "void" or a type name + parameters: Optional[List[Parameter]] = None + raises: Optional[List[str]] = None # list of exception type names + oneway: bool = False + + +@dataclass +class Attribute: + """An interface attribute (property). + + Spec §7.4.13:: + + attr_dcl ::= ["readonly"] "attribute" param_type_spec + simple_declarator {"," simple_declarator} + """ + name: str + datatype: str + readonly: bool = False + + +@dataclass +class Interface: + """An IDL interface definition. + + Spec §7.4.11:: + + interface_dcl ::= interface_header "{" interface_body "}" + interface_header ::= "interface" identifier [inheritance_spec] + inheritance_spec ::= ":" scoped_name {"," scoped_name} + """ + name: str + inherits: Optional[List[str]] = None # parent interface names (scoped) + operations: Optional[List[Operation]] = None + attributes: Optional[List[Attribute]] = None + consts: Optional[List[Const]] = None + + +# ============================================================ +# Module (namespace) +# ============================================================ + +@dataclass +class Module: + """An IDL module (namespace scope). + + Modules may be nested and may be reopened (multiple blocks with the + same name are merged). + + Spec §7.2.3:: + + module_dcl ::= "module" identifier "{" definition+ "}" + """ + name: str + interfaces: Optional[List[Interface]] = None + structs: Optional[List[Struct]] = None + enums: Optional[List[Enum]] = None + exceptions: Optional[List[Exception_]] = None + typedefs: Optional[List[Typedef]] = None + consts: Optional[List[Const]] = None + modules: Optional[List[Module]] = None # nested sub-modules + + +# ============================================================ +# Top-level file +# ============================================================ + +@dataclass +class IDLFile: + """Represents a single parsed .idl source file. + + Unlike AIDL, an IDL file may contain multiple top-level declarations + (modules, interfaces, structs, etc.) without being wrapped in a module. + """ + modules: Optional[List[Module]] = None + # Top-level (outside any module) declarations: + interfaces: Optional[List[Interface]] = None + structs: Optional[List[Struct]] = None + enums: Optional[List[Enum]] = None + exceptions: Optional[List[Exception_]] = None + typedefs: Optional[List[Typedef]] = None + consts: Optional[List[Const]] = None diff --git a/ifex/models/omgidl/omgidl_lark.py b/ifex/models/omgidl/omgidl_lark.py new file mode 100644 index 0000000..f00eb75 --- /dev/null +++ b/ifex/models/omgidl/omgidl_lark.py @@ -0,0 +1,606 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH. +# SPDX-License-Identifier: MPL-2.0 + +# This file is part of the IFEX project +# vim: tw=120 ts=4 et + +""" +Parse an OMG IDL source file and build an IDLFile AST. + +Public API: + get_ast_from_idl_file(idl_file: str) -> IDLFile + Read a .idl file and return its IDLFile AST. + + get_ast_from_idl_text(text: str) -> IDLFile + Parse IDL source text directly and return IDLFile. + +Design notes +------------ +Identical approach to aidl_lark.py / protobuf_lark.py: + + 1. Preprocessor lines (#include, #pragma, ...) and comments + (// and /* */) are stripped from the source before Lark sees it. + 2. Lark is run in LALR mode with the grammar in omgidl.grammar. + 3. The resulting lark.Tree / lark.Token tree is walked manually + (destructively, using .children.pop(0)) to build typed AST + objects defined in omgidl_ast.py. + +The Tree / Token data model (same as aidl_lark / protobuf_lark): + + Tree(data, children): + .data = Token('RULE', rule_name) + .children = list of Tree or Token objects + + Token(type, value): + .type = uppercase terminal name from grammar + .value = matched string + +The same pattern-matching mini-framework from aidl_lark.py is copied +verbatim so assert_rule_match, assert_token, get_items_of_type etc. +are available with identical semantics. +""" + +import lark +import os +import re +import sys + +from lark import Lark, Tree, Token + +from ifex.models.omgidl import omgidl_ast as omgidl_model +from ifex.models.omgidl.omgidl_ast import ( + IDLFile, Module, Interface, Operation, Attribute, Const, + Parameter, Struct, Enum, Enumerator, Exception_, Typedef, Member, +) +from ifex.models.common.ast_utils import ast_as_yaml + + +# ============================================================ +# Low-level helpers (identical to aidl_lark.py) +# ============================================================ + +def filter_out(s, re_pattern): + """Remove lines matching a regexp.""" + return '\n'.join([line for line in s.split('\n') if not re_pattern.match(line)]) + + +def filter_out_partial(s, pattern): + """Remove partial matches from each line.""" + return '\n'.join([re.sub(pattern, "", line) for line in s.split('\n')]) + + +def is_tree(node): + return type(node) is lark.Tree + + +def is_token(node): + return type(node) is lark.Token + + +def truncate_string(s, maxlen=77): + if len(s) > maxlen: + return s[0:maxlen] + "..." + return s + + +# ============================================================ +# Pattern-matching framework (identical to aidl_lark.py) +# ============================================================ + +def match_str(s1, s2): + return s1 == s2 or s2 == "*" + + +def match_children(node_list, pattern_list): + return (pattern_list == ['*'] or + (len(node_list) == len(pattern_list) and + all(matcher(x, y) for (x, y) in zip(node_list, pattern_list)))) + + +def matcher(node, pattern): + if type(node) is list: + return (pattern is list and + len(node) == len(pattern) and + all(matcher(x, y) for (x, y) in zip(node, pattern))) + elif is_tree(node): + return (is_tree(pattern) and + node.data == pattern.data and + match_children(node.children, pattern.children)) + elif is_token(node): + return (is_token(pattern) and + match_str(node.type, pattern.type) and + match_str(node.value, pattern.value)) + else: + raise TypeError(f"Unknown type passed to matcher(): {type(node)}") + + +def get_items_of_type(node, grammar_rule_name): + """Return all direct children of node whose rule name matches.""" + return [x for x in node.children + if matcher(x, Tree(Token('RULE', grammar_rule_name), ['*']))] + + +# ============================================================ +# Assert helpers (identical to aidl_lark.py) +# ============================================================ + +def create_error_message(node, pattern): + return (f"\nPROBLEM: Failed expected match:\n" + f" - wanted pattern: {truncate_string(repr(pattern))}\n" + f" - item is: {truncate_string(repr(node))}") + + +def assert_match(node, pattern, error_message=None): + if not matcher(node, pattern): + if error_message is None: + error_message = create_error_message(node, pattern) + raise Exception(error_message) + + +def rule_match(tree, grammar_rule_name): + return matcher(tree, Tree(Token('RULE', grammar_rule_name), ['*'])) + + +def assert_rule_match(tree, grammar_rule_name): + assert_match(tree, Tree(Token('RULE', grammar_rule_name), ['*'])) + + +def assert_rule_match_any(tree, grammar_rule_names): + if not any(matcher(tree, Tree(Token('RULE', y), ['*'])) for y in grammar_rule_names): + raise Exception( + f"PROBLEM: Failed expected match:\n" + f" - wanted one of {grammar_rule_names}\n" + f" - item is: {truncate_string(repr(tree))}") + + +def assert_token(node, token_type, data_match='*'): + assert_match(node, Token(token_type, data_match)) + + +def assert_token_any(node, token_types, data_match='*'): + if not any(matcher(node, Token(y, "*")) for y in token_types): + raise Exception( + f"PROBLEM: Failed expected token type(s):\n" + f" - wanted one of {token_types}\n" + f" - item is: {truncate_string(repr(node))}") + + +# ============================================================ +# Preprocessor and comment stripping +# ============================================================ + +def filter_preprocessor(text): + """Strip C preprocessor lines (#include, #pragma, #define, etc.).""" + return filter_out(text, re.compile(r'^ *#')) + + +def filter_comments(text): + """Strip C/C++ style comments.""" + # Remove full comment lines + text = filter_out(text, re.compile(r'^ *//')) + # Remove inline trailing comments + text = filter_out_partial(text, r'//.*$') + # Remove block comments + text = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL) + return text + + +def preprocess(text): + """Apply all pre-parse text transformations.""" + text = filter_comments(text) + text = filter_preprocessor(text) + return text + + +# ============================================================ +# Type extraction helpers +# ============================================================ + +def process_type(t): + """Extract the string representation of a type_spec node. + + Handles three grammar aliases: + builtin_array_type: BUILTIN_TYPE ARRAY_SUFFIX? + named_array_type: IDENT ARRAY_SUFFIX? + sequence_type: "sequence" "<" (BUILTIN_TYPE|IDENT) ["," INT] ">" + + Returns a string like "long", "float[3]", "sequence", + "sequence", "ClimateZone". + """ + assert_rule_match_any(t, ['builtin_array_type', 'named_array_type', 'sequence_type']) + + rule_name = t.data.value # Token('RULE', name).value + + if rule_name == 'sequence_type': + elem_token = t.children.pop(0) + assert_token_any(elem_token, ['BUILTIN_TYPE', 'IDENT']) + type_str = f"sequence<{elem_token.value}" + if t.children: + bound_token = t.children.pop(0) + assert_token(bound_token, 'INT') + type_str += f",{bound_token.value}" + type_str += ">" + return type_str + + # builtin_array_type or named_array_type + name_token = t.children.pop(0) + assert_token_any(name_token, ['BUILTIN_TYPE', 'IDENT']) + type_str = name_token.value + if t.children: + suffix_token = t.children.pop(0) + assert_token(suffix_token, 'ARRAY_SUFFIX') + type_str += suffix_token.value + return type_str + + +def process_scoped_name(node): + """Extract a scoped_name node to a '::'-joined string.""" + assert_rule_match(node, 'scoped_name') + parts = [tok.value for tok in node.children if is_token(tok) and tok.type == 'IDENT'] + # Handle leading '::' (global scope) — kept as-is + leading = '::' if (node.children and is_token(node.children[0]) + and node.children[0].type == '__ANON_0') else '' + return leading + '::'.join(parts) + + +# ============================================================ +# Processing functions — one per grammar rule +# ============================================================ + +def process_member(m): + """Process a member_decl rule → Member. + + Grammar rule: member_decl: type_spec IDENT ";" + """ + assert_rule_match(m, 'member_decl') + + type_node = m.children.pop(0) + datatype = process_type(type_node) + + name_token = m.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + return Member(name=name, datatype=datatype) + + +def process_param(p): + """Process a param rule → Parameter. + + Grammar rule: param: X_DIRECTION type_spec IDENT + """ + assert_rule_match(p, 'param') + + dir_token = p.children.pop(0) + assert_token(dir_token, 'X_DIRECTION') + direction = dir_token.value + + type_node = p.children.pop(0) + datatype = process_type(type_node) + + name_token = p.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + return Parameter(name=name, datatype=datatype, direction=direction) + + +def process_raises(r): + """Process a raises_clause rule → List[str]. + + Grammar rule: raises_clause: "raises" "(" scoped_name ("," scoped_name)* ")" + """ + assert_rule_match(r, 'raises_clause') + names = [] + for node in get_items_of_type(r, 'scoped_name'): + names.append(process_scoped_name(node)) + return names + + +def process_operation(o): + """Process an operation_decl rule → Operation. + + Grammar rule: + operation_decl: X_ONEWAY? op_return_type IDENT "(" param_list? ")" raises_clause? ";" + """ + assert_rule_match(o, 'operation_decl') + + # 1. Optional oneway + oneway = False + if o.children and is_token(o.children[0]) and o.children[0].type == 'X_ONEWAY': + o.children.pop(0) + oneway = True + + # 2. Return type (void_return or typed_return) + ret_node = o.children.pop(0) + assert_rule_match_any(ret_node, ['void_return', 'typed_return']) + if rule_match(ret_node, 'void_return'): + return_type = 'void' + else: + return_type = process_type(ret_node.children[0]) + + # 3. Operation name + name_token = o.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + # 4. Optional parameter list + params = [] + for pl in get_items_of_type(o, 'param_list'): + for p in get_items_of_type(pl, 'param'): + params.append(process_param(p)) + + # 5. Optional raises clause + raises = None + raises_nodes = get_items_of_type(o, 'raises_clause') + if raises_nodes: + raises = process_raises(raises_nodes[0]) + + return Operation( + name=name, + return_type=return_type, + parameters=params if params else None, + raises=raises, + oneway=oneway, + ) + + +def process_attribute(a): + """Process an attribute_decl rule → Attribute. + + Grammar rule: attribute_decl: X_READONLY? "attribute" type_spec IDENT ";" + """ + assert_rule_match(a, 'attribute_decl') + + # 1. Optional readonly + readonly = False + if a.children and is_token(a.children[0]) and a.children[0].type == 'X_READONLY': + a.children.pop(0) + readonly = True + + # 2. Type + type_node = a.children.pop(0) + datatype = process_type(type_node) + + # 3. Name + name_token = a.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + return Attribute(name=name, datatype=datatype, readonly=readonly) + + +def process_const(c): + """Process a const_dcl rule → Const. + + Grammar rule: const_dcl: "const" type_spec IDENT "=" CONST_VALUE ";" + """ + assert_rule_match(c, 'const_dcl') + + type_node = c.children.pop(0) + datatype = process_type(type_node) + + name_token = c.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + value_token = c.children.pop(0) + assert_token(value_token, 'CONST_VALUE') + value = value_token.value.strip() + + return Const(name=name, datatype=datatype, value=value) + + +def process_struct(s): + """Process a struct_decl rule → Struct. + + Grammar rule: struct_decl: "struct" IDENT "{" member_decl+ "}" ";" + """ + assert_rule_match(s, 'struct_decl') + + name_token = s.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + members = [process_member(m) for m in get_items_of_type(s, 'member_decl')] + + return Struct(name=name, members=members if members else None) + + +def process_exception(e): + """Process an exception_decl rule → Exception_. + + Grammar rule: exception_decl: "exception" IDENT "{" member_decl* "}" ";" + """ + assert_rule_match(e, 'exception_decl') + + name_token = e.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + members = [process_member(m) for m in get_items_of_type(e, 'member_decl')] + + return Exception_(name=name, members=members if members else None) + + +def process_enum(e): + """Process an enum_decl rule → Enum. + + Grammar rule: enum_decl: "enum" IDENT "{" IDENT ("," IDENT)* "}" ";" + + Note: OMG IDL enumerators are identifiers only — no explicit integer + values. Values are implicitly 0, 1, 2, ... in declaration order. + """ + assert_rule_match(e, 'enum_decl') + + # First IDENT is the enum name; remaining IDENTs are enumerators. + tokens = [tok for tok in e.children if is_token(tok) and tok.type == 'IDENT'] + + name = tokens[0].value + enumerators = [Enumerator(name=tok.value) for tok in tokens[1:]] + + return Enum(name=name, enumerators=enumerators if enumerators else None) + + +def process_typedef(t): + """Process a typedef_decl rule → Typedef. + + Grammar rule: typedef_decl: "typedef" type_spec IDENT ";" + """ + assert_rule_match(t, 'typedef_decl') + + type_node = t.children.pop(0) + datatype = process_type(type_node) + + name_token = t.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + return Typedef(name=name, datatype=datatype) + + +def process_interface(i): + """Process an interface_decl rule → Interface. + + Grammar rule: + interface_decl: "interface" IDENT inheritance_spec? "{" interface_member* "}" ";" + """ + assert_rule_match(i, 'interface_decl') + + # 1. Name + name_token = i.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + # 2. Optional inheritance + inherits = None + inh_nodes = get_items_of_type(i, 'inheritance_spec') + if inh_nodes: + inh = inh_nodes[0] + inherits = [process_scoped_name(sn) for sn in get_items_of_type(inh, 'scoped_name')] + + # 3. Operations + operations = [process_operation(o) for o in get_items_of_type(i, 'operation_decl')] + + # 4. Attributes + attributes = [process_attribute(a) for a in get_items_of_type(i, 'attribute_decl')] + + # 5. Consts + consts = [process_const(c) for c in get_items_of_type(i, 'const_dcl')] + + return Interface( + name=name, + inherits=inherits, + operations=operations if operations else None, + attributes=attributes if attributes else None, + consts=consts if consts else None, + ) + + +def process_module(m): + """Process a module_decl rule → Module (recursive). + + Grammar rule: module_decl: "module" IDENT "{" definition* "}" ";" + """ + assert_rule_match(m, 'module_decl') + + name_token = m.children.pop(0) + assert_token(name_token, 'IDENT') + name = name_token.value + + interfaces = [process_interface(x) for x in get_items_of_type(m, 'interface_decl')] + structs = [process_struct(x) for x in get_items_of_type(m, 'struct_decl')] + enums = [process_enum(x) for x in get_items_of_type(m, 'enum_decl')] + exceptions = [process_exception(x) for x in get_items_of_type(m, 'exception_decl')] + typedefs = [process_typedef(x) for x in get_items_of_type(m, 'typedef_decl')] + consts = [process_const(x) for x in get_items_of_type(m, 'const_dcl')] + submodules = [process_module(x) for x in get_items_of_type(m, 'module_decl')] + + return Module( + name=name, + interfaces=interfaces if interfaces else None, + structs=structs if structs else None, + enums=enums if enums else None, + exceptions=exceptions if exceptions else None, + typedefs=typedefs if typedefs else None, + consts=consts if consts else None, + modules=submodules if submodules else None, + ) + + +# ============================================================ +# Top-level tree processor +# ============================================================ + +def process_lark_tree(root): + """Walk the root lark.Tree and build an IDLFile AST. + + Grammar rule: idl_file: definition* + """ + assert_rule_match(root, 'idl_file') + + modules = [process_module(x) for x in get_items_of_type(root, 'module_decl')] + interfaces = [process_interface(x) for x in get_items_of_type(root, 'interface_decl')] + structs = [process_struct(x) for x in get_items_of_type(root, 'struct_decl')] + enums = [process_enum(x) for x in get_items_of_type(root, 'enum_decl')] + exceptions = [process_exception(x) for x in get_items_of_type(root, 'exception_decl')] + typedefs = [process_typedef(x) for x in get_items_of_type(root, 'typedef_decl')] + consts = [process_const(x) for x in get_items_of_type(root, 'const_dcl')] + + return IDLFile( + modules=modules if modules else None, + interfaces=interfaces if interfaces else None, + structs=structs if structs else None, + enums=enums if enums else None, + exceptions=exceptions if exceptions else None, + typedefs=typedefs if typedefs else None, + consts=consts if consts else None, + ) + + +# ============================================================ +# Grammar loading and parsing +# ============================================================ + +def _load_grammar() -> str: + grammar_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'omgidl.grammar') + with open(grammar_file, 'r') as f: + return f.read() + + +def parse_text(text: str) -> IDLFile: + """Parse OMG IDL source text and return an IDLFile AST.""" + grammar = _load_grammar() + parser = Lark(grammar, parser='lalr') + clean = preprocess(text) + tree = parser.parse(clean) + return process_lark_tree(tree) + + +# ============================================================ +# Public entry points +# ============================================================ + +def get_ast_from_idl_file(idl_file: str) -> IDLFile: + """Read a .idl file and return its IDLFile AST. + + :param idl_file: path to a .idl source file + :return: IDLFile abstract syntax tree + """ + with open(idl_file, 'r') as f: + text = f.read() + return parse_text(text) + + +def get_ast_from_idl_text(text: str) -> IDLFile: + """Parse OMG IDL source text and return an IDLFile AST.""" + return parse_text(text) + + +# ============================================================ +# Script entry point +# ============================================================ + +if __name__ == '__main__': + ast = get_ast_from_idl_file(sys.argv[1]) + print(ast_as_yaml(ast)) diff --git a/setup.py b/setup.py index 9e579c7..1324801 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ "ifexgen_dbus=distribution.entrypoints.generator_dbus:ifex_dbus_generator_run", "ifexconv_protobuf=distribution.entrypoints.protobuf_ifex:protobuf_to_ifex_run", "aidl_to_ifex=distribution.entrypoints.aidl_to_ifex:aidl_to_ifex_run", + "omgidl_to_ifex=distribution.entrypoints.omgidl_to_ifex:omgidl_to_ifex_run", ], }, include_package_data=True,