88
99logger = logging .getLogger (__name__ )
1010
11+ DATASET_PROPERTIES = [
12+ 'AwsAccountId' , 'DataSetId' , 'Name' , 'PhysicalTableMap' , 'LogicalTableMap' , 'ImportMode' , 'ColumnGroups' ,
13+ 'RowLevelPermissionDataSet' , 'RowLevelPermissionTagConfiguration' , 'FieldFolders' , 'ColumnLevelPermissionRules' ,
14+ 'DataSetUsageConfiguration' , 'DatasetParameters' , 'PerformanceConfiguration'
15+ ]
16+
1117
1218class Dataset (CidQsResource ):
1319
@@ -189,7 +195,7 @@ def _athena_to_qs_type(col, athena_type):
189195 projected_cols .append (col )
190196
191197 # filter out all columns that cannot be used for dataset creation
192- update_ = {key : value for key , value in dataset .items () if key in 'DataSetId, Name, PhysicalTableMap, LogicalTableMap, ImportMode, ColumnGroups, FieldFolders, RowLevelPermissionDataSet, RowLevelPermissionTagConfiguration, ColumnLevelPermissionRules, DataSetUsageConfiguration, DatasetParameters' . split ( ', ' ) }
198+ update_ = {key : value for key , value in dataset .items () if key in DATASET_PROPERTIES }
193199 logger .trace (f'update_ = { update_ } ' )
194200 return update_
195201
@@ -230,4 +236,41 @@ def to_diffable_structure(self):
230236 for alias , join in join_clauses .items ():
231237 if isinstance (data ['Data' ].get (alias ), dict ) :
232238 data ['Data' ][alias ]['clause' ] = join
233- return (yaml .safe_dump (data ))
239+ return (yaml .safe_dump (data ))
240+
241+ @staticmethod
242+ def datasets_are_identical (dataset1 , dataset2 ):
243+ ''' Compare 2 datasets and returns True if no difference found
244+ '''
245+ if (not dataset1 and not dataset2 ):
246+ return True
247+ identical = False
248+ if (dataset1 and not dataset2 ) or (not dataset1 and dataset2 ):
249+ return identical
250+ dataset1 = dataset1 if isinstance (dataset1 , Dataset ) else Dataset (dataset1 )
251+ dataset2 = dataset2 if isinstance (dataset2 , Dataset ) else Dataset (dataset2 )
252+ identical = True
253+ for key in DATASET_PROPERTIES :
254+ if key in ['AwsAccountId' , 'DataSetId' ]:
255+ continue
256+ if dataset1 .raw .get (key ) != dataset2 .raw .get (key ):
257+ logger .trace (f'not identical { key } { dataset1 .raw .get (key )} != { dataset2 .raw .get (key )} ' )
258+ identical = False
259+ logger .trace (f'identical to existing = { identical } ' )
260+ return identical
261+
262+ @staticmethod
263+ def merge_datasets (dataset1 , dataset2 ):
264+ ''' merge high level 2 datasets. Not a deep merge.
265+ '''
266+ if not dataset2 :
267+ return dataset1
268+ if not dataset1 :
269+ return dataset2
270+ dataset1 = dataset1 if isinstance (dataset1 , Dataset ) else Dataset (dataset1 )
271+ dataset2 = dataset2 if isinstance (dataset2 , Dataset ) else Dataset (dataset2 )
272+ result = {}
273+ for key in DATASET_PROPERTIES :
274+ if dataset1 .raw .get (key ) or dataset2 .raw .get (key ):
275+ result [key ] = dataset1 .raw .get (key ) or dataset2 .raw .get (key )
276+ return result
0 commit comments