Skip to content

Commit 8f2eeae

Browse files
Version 1.4.0 - Add CompareOutputDataAnalysis class
- Add class for analyzing output data - Add new Counterpart data for VuMC2021_N-ary
1 parent 758af1b commit 8f2eeae

File tree

5 files changed

+305
-1
lines changed

5 files changed

+305
-1
lines changed
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
#!usr/bin/env python
2+
## \file CompareOutputDataAnalysispy.py
3+
# \brief Compares output data for difference and plotting analysis.
4+
# \author Revolution Rivera-Felix, [email protected], [email protected]
5+
# \author Aaron Olson, [email protected], [email protected]
6+
import pandas as pd
7+
import numpy as np
8+
import warnings
9+
import matplotlib.pyplot as plt
10+
11+
12+
class CompareOutputDataAnalysis(object):
13+
def __init__(self):
14+
self.reference = {}
15+
self.comparison = {}
16+
self.metrics = {}
17+
self.markers = ['o', '^', 's', 'D', 'v', 'p', '+', '1', 'x', '*', 'h']
18+
self.shorthand_mapping = {
19+
"Sign" : "Signed",
20+
"Unsign": "Unsigned",
21+
"Rel" : "Relative",
22+
"Abs" : "Absolute",
23+
"Diff" : "Difference"}
24+
25+
## \brief Read in CSV data, name it, and add to class-defined dictionaries
26+
#
27+
# Only one reference dataset can be defined at a time.
28+
# Multiple comparison datasets can be defined at a time.
29+
# When using comparison capabilities in this class,
30+
# each of the comparison datasets will be compared to the reference dataset.
31+
# It is recommended that a new object be created for each reference dataset
32+
# a user wants to work with.
33+
#
34+
# \param[in] filePath string, local path to file read in by pandas
35+
# \param[in] type string, choose between reference or comparison dataset
36+
# \param[in] name string, optional parameter to name file data (otherwise name will be full file path)
37+
# \returns Sets as attributes the dictionaries of reference and comparison datasets
38+
def readCSV(self, filePath, type, name=None):
39+
if type not in ('reference', 'comparison'): raise Exception ("\'type\' must be either \'reference\' or \'comparison\'.")
40+
if name==None: name = filePath
41+
assert isinstance(name, str)
42+
if type=="reference":
43+
if len(self.reference) > 0:
44+
warnings.warn("Only one reference dataset allowed at a time; new reference dataset has overwritten old reference dataset. Comparison datasets may contain values computed with different reference datasets. To ensure the clarity and purity of the computed values, create a new object for every reference dataset you want to use.")
45+
self.reference.clear()
46+
self.reference[name] = pd.read_csv(filePath, comment="#", index_col=[0])
47+
elif type=="comparison":
48+
self.comparison[name] = pd.read_csv(filePath, comment="#", index_col=[0])
49+
50+
## \brief Compute difference between the reference dataset and each comparison dataset
51+
#
52+
# Note that rows with uncertainty values must have a row index that follows the format Quantity_unc (e.g. "Refl_unc")
53+
# or else the propogation of uncertainty values will be skipped for the current operation.
54+
#
55+
# \param[in] SignedOrUnsignedDiffs str, 'signed' or 'unsigned', computes signed or unsigned differences
56+
# \param[in] AbsoluteOrRelativeDiffs string, 'absolute' or 'relative', computes absolute or relative differences
57+
# \param[in] quantity string, row of the datasets the user wants to work with (e.g. 'Refl' or 'Trans')
58+
# \returns Adds difference values and difference uncertainty values as new rows to the comparison dataset(s)
59+
def computeDifferences(self, SignedOrUnsignedDiffs, AbsoluteOrRelativeDiffs, quantity):
60+
if len(self.reference) == 0: warnings.warn("There is no reference dataset. A reference dataset is necessary to compute differences with the comparison datasets. Results of this function call may not return expected values.")
61+
if len(self.comparison) == 0: warnings.warn("There is no comparison dataset. At least one comparison dataset is required to compute differences. Results of this function call may not return expected values.")
62+
if SignedOrUnsignedDiffs not in {'signed' , 'unsigned'}: raise Exception ("\'SignedOrUnsignedDiffs\' must be either \'relative\' or \'absolute\'.")
63+
if AbsoluteOrRelativeDiffs not in {'relative', 'absolute'}: raise Exception ("\'AbsoluteOrRelativeDiffs\' must be either \'relative\' or \'absolute\'.")
64+
assert isinstance(quantity, str)
65+
identifier = ' Sign' if SignedOrUnsignedDiffs == 'signed' else ' Unsign'
66+
identifier += ' Abs' if AbsoluteOrRelativeDiffs == 'absolute' else ' Rel'
67+
identifier += ' Diff'
68+
# Get values from reference dataset according to quantity
69+
refRow = self.reference[next(iter(self.reference))].loc[quantity]
70+
flReferenceUncPresent = self.reference[next(iter(self.reference))].index.str.contains(quantity+"_unc").any()
71+
if flReferenceUncPresent: refUncRow = self.reference[next(iter(self.reference))].loc[quantity+"_unc"]
72+
else : warnings.warn(f"The Reference Dataset does not have any uncertainty values associated with the {quantity} values. The propogation of error uncertainty has been skipped for this operation.")
73+
for key in self.comparison:
74+
# Compute differences
75+
compRow = self.comparison[key].loc[quantity]
76+
diff = compRow - refRow
77+
if AbsoluteOrRelativeDiffs == 'relative': diff = diff / refRow
78+
if SignedOrUnsignedDiffs == 'unsigned': diff = np.abs(diff)
79+
self.comparison[key].loc[quantity+identifier] = diff
80+
# Compute uncertainties of differences
81+
if flReferenceUncPresent:
82+
flCompUncPresent = self.comparison[key].index.str.contains(quantity+"_unc").any()
83+
if flCompUncPresent:
84+
compUncRow = self.comparison[key].loc[quantity+"_unc"]
85+
if AbsoluteOrRelativeDiffs == 'relative': unc = np.sqrt((compUncRow/refRow)**2 + ((compRow * refUncRow)/refRow**2)**2)
86+
elif AbsoluteOrRelativeDiffs == 'absolute': unc = np.sqrt(refUncRow**2 + compUncRow**2)
87+
self.comparison[key].loc[quantity+identifier+" Unc"] = unc
88+
else: warnings.warn(f"{key} does not have any uncertainty values associated with the {quantity} values. The propogation of error uncertainty has been skipped for this operation.")
89+
90+
## \brief Compute metrics (i.e. mean absolute, root mean squared, and max absolute) of the difference values in the comparison DataFrames
91+
#
92+
# Comparison datasets with no difference values will be skipped with warning given.
93+
#
94+
# \param[in] rowIdentifier str, default 'Diff', used to search datasets for difference values
95+
# \returns Adds difference metrics values as new row(s) to the comparison dataset(s)
96+
def computeDifferenceMetrics(self, rowIdentifier='Diff'):
97+
if len(self.comparison) == 0: raise Exception("There is no comparison dataset. At least one comparison dataset is required to compute differences. Results of this function call may not return expected values.")
98+
assert isinstance(rowIdentifier, str)
99+
for key in self.comparison:
100+
# Create temp DataFrame with rows containing the rowIdentifier string in the row index
101+
tempDF = self.comparison[key][self.comparison[key].index.str.contains(rowIdentifier)& ~self.comparison[key].index.str.contains('Unc')]
102+
if tempDF.empty == True:
103+
warnings.warn(f" The \'{key}\' dataset does not contain any difference values. The metrics computations have been skipped for this dataset.")
104+
continue
105+
# Apply lambda functions to the rows to compute metrics
106+
diffDF = pd.DataFrame({
107+
'meanDiff' : tempDF.apply(lambda row: np.mean(np.abs( row )), axis=1),
108+
'RMSDiff' : tempDF.apply(lambda row: np.sqrt(np.mean(row**2)), axis=1),
109+
'maxAbsDiff': tempDF.apply(lambda row: np.max( np.abs( row )), axis=1)
110+
})
111+
self.metrics[key] = diffDF
112+
113+
## \brief Plots the output values specified by the user
114+
#
115+
# \param[in] quantity str, default None (e.g. Refl, Trans), used to identify and collect data from the DataFrames
116+
# \param[in] fileName str, default 'outputs', used to specify file name of plots generated
117+
# \param[in] flShowPlot bool, default True, used to display plot to user after calculations
118+
# \param[in] flSavePlot bool, default True, used to automatically save generated plots to a .png file at the location of the driver script
119+
# \param[in] yLim tuple of floats, default None (e.g. (-1, 1.5)), sets the y-axis limits of the plot
120+
# \param[in] plotTitle str, default None, allows user to specify a title for the plot (otherwise one will be generated automatically)
121+
# \returns Displays and saves an output plot dependent on user specifications
122+
def plotOutputsAcrossCases(self, quantity=None, fileName='outputs', flShowPlot=True, flSavePlot=True, yLim=None, plotTitle=None):
123+
if quantity == None: raise Exception ('Please specify which quantity you want to plot (e.g. \'Refl\' or \'Trans\').')
124+
plt.figure()
125+
xAxis = self.comparison[next(iter(self.comparison))].columns.tolist()
126+
markerCounter = 0
127+
for key in self.comparison:
128+
# Slice comparison DataFrames based on user parameters to prepare for plotting
129+
rows = self.comparison[key][self.comparison[key].index.str.contains(quantity) & ~self.comparison[key].index.str.contains('Diff') & ~self.comparison[key].index.str.contains('unc')]
130+
uncRows = self.comparison[key][self.comparison[key].index.str.contains(quantity) & ~self.comparison[key].index.str.contains('Diff') & self.comparison[key].index.str.contains('unc')]
131+
# Adding error bars & plotting
132+
if rows.empty: warnings.warn(f"There are no {quantity} values in the {key} dataset.")
133+
else:
134+
for idx, row in rows.iterrows():
135+
if idx+"_unc" in uncRows.index: yerr = uncRows.loc[idx+"_unc"].tolist()
136+
else : yerr = None
137+
expandedIdx = " ".join([self.shorthand_mapping.get(word, word) for word in idx.split()]) # Expands the short-hand version of the legend
138+
plt.errorbar(xAxis, row, yerr=yerr, marker=self.markers[markerCounter % len(self.markers)], markersize=5, label=f'{key} - {expandedIdx}')
139+
markerCounter += 1
140+
if plotTitle: plt.title(plotTitle, fontsize=25)
141+
else :
142+
title = quantity+' '+'Leakage'
143+
expandedTitle = " ".join([self.shorthand_mapping.get(word, word) for word in title.split()]) # Expands the short-hand version of the title
144+
plt.title(expandedTitle, fontsize=25)
145+
plt.ylabel('Leakage Values', fontsize=20)
146+
147+
# Check if any data points have been plotted
148+
check = plt.gca()
149+
if len(check.lines) == 0: raise Exception ('None of the datasets have difference values according to the specified identifiers. No plot will be displayed.')
150+
# Continue plotting
151+
plt.xlabel('Case', fontsize=20)
152+
if yLim: plt.ylim(yLim)
153+
plt.legend(fontsize=10)
154+
plt.xticks(fontsize=15)
155+
plt.yticks(fontsize=15)
156+
if flSavePlot: plt.savefig(fileName+'.png')
157+
if flShowPlot: plt.show()
158+
plt.clf()
159+
plt.close()
160+
161+
## \brief Plots the difference values specified by the user
162+
#
163+
# \param[in] quantity str, default None (e.g. Refl, Trans), used to identify and collect data from the DataFrames
164+
# \param[in] dataIdentifiers tuple of strings, default None (e.g. ('Sign', 'Rel')), strings to search for in the row indexes of the datasets
165+
# \param[in] fileName str, default 'differences', used to specify file name of plots generated
166+
# \param[in] flShowPlot bool, default True, used to display plot to user after calculations
167+
# \param[in] flSavePlot bool, default True, used to automatically save generated plots to a .png file at the location of the driver script
168+
# \param[in] yLim tuple of floats, default None (e.g. (-1, 1.5)), sets the y-axis limits of the plot
169+
# \param[in] plotTitle str, default None, allows user to specify a title for the plot (otherwise one will be generated automatically)
170+
# \returns Displays and saves a difference plot dependent on user specifications
171+
def plotDifferencesAcrossCases(self, quantity=None, dataIdentifiers=None, fileName='differences', flShowPlot=True, flSavePlot=True, yLim=None, plotTitle=None):
172+
if quantity == None: raise Exception ('Please specify which quantity you want to plot (e.g. \'Refl\' or \'Trans\').')
173+
174+
plt.figure()
175+
xAxis = self.comparison[next(iter(self.comparison))].columns.tolist()
176+
markerCounter = 0
177+
for key in self.comparison:
178+
# Slice comparison DataFrames based on user parameters to prepare for plotting
179+
diffRows = self.comparison[key][self.comparison[key].index.str.contains(quantity) & self.comparison[key].index.str.contains('Diff') & ~self.comparison[key].index.str.contains('Unc')]
180+
diffUncRows = self.comparison[key][self.comparison[key].index.str.contains(quantity) & self.comparison[key].index.str.contains('Diff') & self.comparison[key].index.str.contains('Unc')]
181+
# Filter rows based on dataIdentifiers and update diffRows/diffUncRows
182+
diffCondition = pd.Series(True, index=diffRows.index)
183+
diffUncCondition = pd.Series(True, index=diffUncRows.index)
184+
if not dataIdentifiers: continue
185+
else:
186+
for stringVar in dataIdentifiers:
187+
diffCondition &= diffRows.index.str.contains(stringVar)
188+
diffUncCondition &= diffUncRows.index.str.contains(stringVar)
189+
diffRows = diffRows[diffCondition]
190+
diffUncRows = diffUncRows[diffUncCondition]
191+
192+
# Adding error bars & plotting
193+
if diffRows.empty: warnings.warn(f"There are no {quantity} Difference values in the {key} dataset with the dataIdentifiers: {dataIdentifiers}.")
194+
else:
195+
for idx, row in diffRows.iterrows():
196+
if idx+" Unc" in diffUncRows.index: yerr = diffUncRows.loc[idx+" Unc"].tolist()
197+
else : yerr = None
198+
expandedIdx = " ".join([self.shorthand_mapping.get(word, word) for word in idx.split()]) # Expands the short-hand version of the legend
199+
plt.errorbar(xAxis, row, yerr=yerr, marker=self.markers[markerCounter % len(self.markers)], markersize=5, label=f'{key} - {expandedIdx}')
200+
markerCounter += 1
201+
if plotTitle: plt.title(plotTitle, fontsize=25)
202+
else :
203+
title = quantity+' '+'Diff'
204+
expandedTitle = " ".join([self.shorthand_mapping.get(word, word) for word in title.split()]) # Expands the short-hand version of the title
205+
plt.title(expandedTitle, fontsize=25)
206+
plt.ylabel('Difference Values', fontsize=20)
207+
208+
# Check if any data points have been plotted
209+
check = plt.gca()
210+
if len(check.lines) == 0: raise Exception ('None of the datasets have difference values according to the specified identifiers. No plot will be displayed.')
211+
# Continue plotting
212+
first, last = self.comparison[next(iter(self.comparison))].columns[0], self.comparison[next(iter(self.comparison))].columns[-1]
213+
plt.plot([first,last],[0,0],color='black',linestyle='--',linewidth=0.8)
214+
plt.xlabel('Case', fontsize=20)
215+
if yLim: plt.ylim(yLim)
216+
plt.legend(fontsize=10)
217+
plt.xticks(fontsize=15)
218+
plt.yticks(fontsize=15)
219+
if flSavePlot: plt.savefig(fileName+'.png')
220+
if flShowPlot: plt.show()
221+
plt.clf()
222+
plt.close()
223+
224+
## \brief Save data as a .csv file
225+
#
226+
# If no dataIdentifiers are specified, the whole dataset will be saved to a .csv file.
227+
#
228+
# \param[in] type string, default 'comparison', determines whether to search in self.comparison or self.metrics
229+
# \param[in] dataset string, default None, finds the key of the dataset within the dictionary
230+
# \param[in] fileName str, default None, allows user to specify a title for the .csv file (otherwise one will be generated automatically)
231+
# \param[in] dataIdentifiers tuple of strings, default None, optionally used to specify rows of data the user wants and filters out the rest
232+
# \param[in] flPrintData bool, default False, whether to print specified data to screen
233+
# \returns Prints and saves a comparison dataset to a .csv file
234+
def datasetToCSV(self, type='comparison', dataset=None, fileName=None, dataIdentifiers=None, flPrintData=False):
235+
if type not in ('comparison', 'metrics'): raise Exception('\'type\' must either be \'comparison\' or \'metrics\'.')
236+
if type == 'comparison': dictionary = self.comparison
237+
else: dictionary = self.metrics
238+
if dataset not in dictionary: raise Exception(f'{dataset} does not exist in the dictionary.')
239+
240+
# Separate values and uncertainty values in specified DataFrame
241+
rows = dictionary[dataset][~dictionary[dataset].index.str.contains('Unc')]
242+
uncRows = dictionary[dataset][ dictionary[dataset].index.str.contains('Unc')]
243+
# Filter rows and uncRows based on dataIdentifiers
244+
if dataIdentifiers:
245+
condition = pd.Series(True, index=rows.index)
246+
uncCondition = pd.Series(True, index=uncRows.index)
247+
for stringVar in dataIdentifiers:
248+
condition &= rows.index.str.contains(stringVar)
249+
uncCondition &= uncRows.index.str.contains(stringVar)
250+
rows = rows[condition]
251+
uncRows = uncRows[uncCondition]
252+
# Concatenate rows and uncRows then save to a .csv file
253+
result = pd.concat([rows, uncRows], axis=0)
254+
if flPrintData: print(f'{dataset}: \n', result)
255+
if fileName == None: fileName = dataset+'_'+type
256+
if not result.empty: result.to_csv(fileName+'.csv')
257+
else: warnings.warn("There are no values for the specified data identifiers--CSV file not created.")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
,1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,3d
2+
Refl,0.288,0.2362,0.4324,0,0.04451,0.652,0.4225,0,0.04405,0.6745,0.4633,0
3+
Refl_unc,0.0001,0.0001,0.0001,0,0.00007,0.0002,0.0002,0,0.00006,0.0001,0.0002,0
4+
Trans,0.1974,0.09913,0.1882,0.0782,0.1318,0.2022,0.1559,0.1101,0.03434,0.1169,0.06312,0.0268
5+
Trans_unc,0.0001,0.00009,0.0001,0.00008,0.0001,0.0001,0.0001,0.0001,0.00006,0.0001,0.00008,0.00005

Counterparts/VuMC2021_CLS/1DVuMC2021_CLS_leakage.csv renamed to Counterparts/VuMC2021_N-ary/VuMC2021_CLS_VolFrac_leakage.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
,1a-volfrac-CLS-L10,1b-volfrac-CLS-L10,1c-volfrac-CLS-L10,1d-volfrac-CLS-L10,2a-volfrac-CLS-L10,2b-volfrac-CLS-L10,2c-volfrac-CLS-L10,2d-volfrac-CLS-L10,3a-volfrac-CLS-L10,3b-volfrac-CLS-L10,3c-volfrac-CLS-L10,3d-volfrac-CLS-L10
1+
,1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,3d
22
Refl,0.2192,0.1797,0.2896,0,0.03156,0.5746,0.314,0,0.02866,0.5915,0.3421,0
33
Refl_unc,0.0001,0.0001,0.0001,0,0.00006,0.0002,0.0001,0,0.00005,0.0002,0.0001,0
44
Trans,0.1811,0.12879,0.1961,0.07827,0.1304,0.2739,0.1691,0.11,0.03361,0.1794,0.07836,0.02679
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
,1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,3d
2+
Refl,0.2478,0.2202,0.3786,0,0.03874,0.6041,0.3625,0,0.03654,0.6288,0.4019,0
3+
Refl_unc,0.0001,0.0001,0.0001,0,0.00006,0.0002,0.0001,0,0.00005,0.0002,0.0002,0
4+
Trans,0.1893,0.10724,0.2048,0.07857,0.1315,0.247,0.1736,0.1103,0.03409,0.1496,0.07677,0.02679
5+
Trans_unc,0.0001,0.00009,0.0001,0.00008,0.0001,0.0001,0.0001,0.0001,0.00006,0.0001,0.00008,0.00005

0 commit comments

Comments
 (0)