fix multi joins in datasets (#1233)

iakov-gan · web-flow · commit f91dd34a3424 · 2025-06-13T09:22:46.000+02:00
* fix multi joins in datasets

* release 4.2.5

* merge
diff --git a/cfn-templates/cid-cfn.yml b/cfn-templates/cid-cfn.yml
@@ -1,6 +1,6 @@
 # https://github.com/aws-samples/aws-cudos-framework-deployment/blob/main/cfn-templates/cid-cfn.yml
 AWSTemplateFormatVersion: '2010-09-09'
-Description: Deployment of Cloud Intelligence Dashboards v4.2.4 - AWS Solution SO9011
+Description: Deployment of Cloud Intelligence Dashboards v4.2.5 - AWS Solution SO9011
 Metadata:
   AWS::CloudFormation::Interface:
     ParameterGroups:
@@ -2070,7 +2070,7 @@ Resources:
       SourceBucket: !Ref ReferenceAssetsBucket
       DestinationBucket: !Ref LocalAssetsBucket
       Keys:
-        - 'cid-resource-lambda-layer/cid-4.2.4.zip' #replace version here if needed
+        - 'cid-resource-lambda-layer/cid-4.2.5.zip' #replace version here if needed
 
   CidResourceLambdaLayer:
     Type: AWS::Lambda::LayerVersion
@@ -2085,7 +2085,7 @@ Resources:
             - LambdaLayerBucketPrefixIsManaged
             - !FindInMap [RegionMap, !Ref 'AWS::Region', BucketName]
             - !Sub '${LambdaLayerBucketPrefix}-${AWS::Region}' # Region added for backward compatibility
-        S3Key: 'cid-resource-lambda-layer/cid-4.2.4.zip' #replace version here if needed
+        S3Key: 'cid-resource-lambda-layer/cid-4.2.5.zip' #replace version here if needed
       CompatibleRuntimes:
         - python3.10
         - python3.11
diff --git a/cid/_version.py b/cid/_version.py
@@ -1,2 +1,2 @@
-__version__ = '4.2.4'
+__version__ = '4.2.5'
 
diff --git a/cid/helpers/quicksight/dataset.py b/cid/helpers/quicksight/dataset.py
@@ -107,15 +107,23 @@ def _athena_to_qs_type(col, athena_type):
             # take the first one and let's hope it is fine
             root_lt = next(iter(dataset['LogicalTableMap'].values()))
 
-        projected_cols = next( # get the first DataTrasform with ProjectOperation
+        renames = {}
+        for lt in dataset['LogicalTableMap'].values():
+            for dt in lt.get('DataTransforms', []):
+                if "RenameColumnOperation" in dt:
+                    key = lt['Source'].get('PhysicalTableId', '') + '.' +  dt["RenameColumnOperation"]['ColumnName'] 
+                    renames[key] = dt["RenameColumnOperation"]['NewColumnName']
+        logger.trace(f'renames = {renames}')
+
+        projected_cols = next( # get the first DataTransform with ProjectOperation
             ds['ProjectOperation']["ProjectedColumns"]
             for ds in root_lt['DataTransforms']
             if 'ProjectOperation' in ds
         )
 
         # Update each PhysicalTableMap with all columns from athena views
         all_columns = []
-        for pt in dataset['PhysicalTableMap'].values():
+        for pt_id, pt in dataset['PhysicalTableMap'].items():
             table_name = pt['RelationalTable']['Name']
             database = pt['RelationalTable']['Schema']
             columns = _get_athena_columns(table_name, database)
@@ -137,15 +145,20 @@ def _athena_to_qs_type(col, athena_type):
                 if col['Name'].lower() not in dataset_columns_names
             ] # BTW what if col is there but another type?
 
-            for col in new_columns: # alter names for columns that already exist
-                if col['Name'].lower() in projected_cols:
-                    col['Name'] = f"{col['Name']}[{table_name}]" # What if it is alrady there?
+            # FIXME: need to add RenameColumnOperation!  col['Name'] => f"{col['Name']}[{table_name}]"
+            new_columns = [col for col in new_columns if col['Name'].lower() not in projected_cols] # avoid things that are already there (probably need to take renames into account)
+            new_columns = [col for col in new_columns if col['Name'] not in all_columns] # avoid adding 2nd time
 
             logger.trace(f'dataset_columns_to_keep = {dataset_columns_to_keep}')
             if new_columns:
-                logger.trace(f'new_columns = {new_columns}')
+                logger.trace(f'new_columns = {new_columns} from {pt_id}')
             pt['RelationalTable']['InputColumns'] = dataset_columns_to_keep + new_columns
-            all_columns += [col['Name'] for col in pt['RelationalTable']['InputColumns']]
+
+            for col in new_columns:
+                col_name = col['Name']
+                if f'{pt_id}.{col_name}' in renames:
+                    col_name = renames[f'{pt_id}.{col_name}']
+                all_columns.append(col_name)
 
         # Add all needed calc fields
         existing_create_columns = [dt.get("CreateColumnsOperation", {}).get('Columns', [None])[0] for dt in root_lt.get('DataTransforms', []) if dt.get("CreateColumnsOperation")]
@@ -172,12 +185,12 @@ def _athena_to_qs_type(col, athena_type):
         # Add all new cols to projected columns
         for col in set(all_columns):
             if col.lower() not in [c.lower() for c in projected_cols]:
+                logger.trace(f'adding {col}')
                 projected_cols.append(col)
 
         # filter out all columns that cannot be used for dataset creation
         update_ = {key: value for key, value in dataset.items() if key in 'DataSetId, Name, PhysicalTableMap, LogicalTableMap, ImportMode, ColumnGroups, FieldFolders, RowLevelPermissionDataSet, RowLevelPermissionTagConfiguration, ColumnLevelPermissionRules, DataSetUsageConfiguration, DatasetParameters'.split(', ')}
         logger.trace(f'update_ = {update_}')
-
         return update_
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-__version__ = '4.2.4'`
	`1`	`+__version__ = '4.2.5'`
`2`	`2`