diff --git a/docs/calculation_details.rst b/docs/calculation_details.rst index 78bd103..989fbf4 100644 --- a/docs/calculation_details.rst +++ b/docs/calculation_details.rst @@ -45,8 +45,8 @@ Multiply the Loss Event Frequency vector by the Loss Magnitude vector Example ~~~~~~~ For a given year, if we have the number of times a particular event -occurs (Loss Event Frequency/LEF) and the dollar losses associated with -each of those events (Loss Magnitude/LM), we can multiply these +occurs (Loss Event Frequency/LEF) and the dollar losses associated with +each of those events (Loss Magnitude/LM), we can multiply these together to derive the ultimate dollar value amount lost (Risk/R). +------------+-----+--------+--------------+ @@ -64,7 +64,7 @@ together to derive the ultimate dollar value amount lost (Risk/R). Description ~~~~~~~~~~~ -A vector of elements which represent the number of times a particular +A vector of elements which represent the number of times a particular loss occurs during a given time frame (generally one year) Restrictions @@ -119,7 +119,7 @@ multiply these together to derive the number of losses that will occur .. note:: - Though intended to represent a discrete number of events, TEF and + Though intended to represent a discrete number of events, TEF and LEF are not rounded to the nearest integer. This allows for the modeling of events that happen infrequently. For instance, if we are running a simulation for a single year, one might model a @@ -130,7 +130,7 @@ multiply these together to derive the number of losses that will occur Description ~~~~~~~~~~~ -A vector of elements representing the number of times a particular +A vector of elements representing the number of times a particular threat occurs, whether or not it results in a loss Restrictions @@ -139,7 +139,7 @@ All elements must be positive Derivation ~~~~~~~~~~ -Supplied directly, or multiply the Contact Frequency vector and the +Supplied directly, or multiply the Contact Frequency vector and the Probability of Action vector .. math:: @@ -312,8 +312,8 @@ a control (Vulnerability/V). Description ~~~~~~~~~~~ -A vector with elements representing the number of threat -actor contacts that could potentially yield a threat within a given +A vector with elements representing the number of threat +actor contacts that could potentially yield a threat within a given timeframe Restrictions @@ -332,7 +332,7 @@ attack, and in turn can potentially yield a loss (Contact Frequency/C). +------------+-----------+ | Simulation | C | +============+===========+ -| 1 | 5,000,000 | +| 1 | 5,000,000 | +------------+-----------+ | 2 | 3,000,000 | +------------+-----------+ @@ -345,7 +345,7 @@ attack, and in turn can potentially yield a loss (Contact Frequency/C). Description ~~~~~~~~~~~ A vector with elements representing the probability that a threat actor -will proceed after coming into contact with an organization +will proceed after coming into contact with an organization Restrictions ------------ @@ -363,7 +363,7 @@ resource (Probability of Action/P) +------------+------+ | Simulation | P | +============+======+ -| 1 | 0.95 | +| 1 | 0.95 | +------------+------+ | 2 | 0.90 | +------------+------+ @@ -375,7 +375,7 @@ resource (Probability of Action/P) Description ~~~~~~~~~~~ -A vector of unitless elements that describe the relative +A vector of unitless elements that describe the relative level of expertise and resources of a threat actor (relative to a Control Strength) @@ -395,7 +395,7 @@ relates to the relative strength of the controls (Control Strength/CS) +------------+------+ | Simulation | TC | +============+======+ -| 1 | 0.75 | +| 1 | 0.75 | +------------+------+ | 2 | 0.60 | +------------+------+ @@ -407,7 +407,7 @@ relates to the relative strength of the controls (Control Strength/CS) Description ~~~~~~~~~~~ -A vector of unitless elements that describe the relative strength of a +A vector of unitless elements that describe the relative strength of a given control (relative to the Threat Capability of a given actor) Restrictions @@ -427,7 +427,7 @@ Capability/TC) +------------+------+ | Simulation | TC | +============+======+ -| 1 | 0.15 | +| 1 | 0.15 | +------------+------+ | 2 | 0.10 | +------------+------+ @@ -514,7 +514,7 @@ Loss/PL) +------------+------------+ | Simulation | PL | +============+============+ -| 1 | $5,000,000 | +| 1 | $5,000,000 | +------------+------------+ | 2 | $3,500,000 | +------------+------------+ @@ -540,43 +540,39 @@ multiplied together on an elementwise basis. .. math:: - \begin{bmatrix} - \text{SL}_{1} \\ - \text{SL}_{1} \\ - \vdots \\ - \text{SL}_{1} \\ - \end{bmatrix} - \quad - = - \quad - \sum\limits^n_{j=1} - \quad - \left( - \quad - \begin{bmatrix} - \text{SLEF}_{1,1} & \text{SLEF}_{1,2} & \dots & \text{SLEF}_{1,n} \\ - \text{SLEF}_{2,1} & \text{SLEF}_{2,2} & \dots & \text{SLEF}_{2,n} \\ - \vdots & \vdots & \ddots & \vdots \\ - \text{SLEF}_{m,1} & \text{SLEF}_{m,2} & \dots & \text{SLEF}_{m,n} \\ - \end{bmatrix} - \quad - \circ - \quad - \begin{bmatrix} - \text{SLEM}_{1,1} & \text{SLEM}_{1,2} & \dots & \text{SLEM}_{1,n} \\ - \text{SLEM}_{2,1} & \text{SLEM}_{2,2} & \dots & \text{SLEM}_{2,n} \\ - \vdots & \vdots & \ddots & \vdots \\ - \text{SLEM}_{m,1} & \text{SLEM}_{m,2} & \dots & \text{SLEM}_{m,n} \\ - \end{bmatrix} - \quad - \right) + \\begin{split} + \\mathbf{SL} &= + \\sum_{j=1}^{n} \\left( \\mathbf{SLEF} \\circ \\mathbf{SLEM} \\right)_{rowwise} \\\\ + \\text{where:} \\\\ + \\mathbf{SLEF} &= + \\begin{bmatrix} + \\text{SLEF}_{1,1} & \\text{SLEF}_{1,2} & \\dots & \\text{SLEF}_{1,n} \\\\ + \\text{SLEF}_{2,1} & \\text{SLEF}_{2,2} & \\dots & \\text{SLEF}_{2,n} \\\\ + \\vdots & \\vdots & \\ddots & \\vdots \\\\ + \\text{SLEF}_{m,1} & \\text{SLEF}_{m,2} & \\dots & \\text{SLEF}_{m,n} + \\end{bmatrix} \\\\ + \\mathbf{SLEM} &= + \\begin{bmatrix} + \\text{SLEM}_{1,1} & \\text{SLEM}_{1,2} & \\dots & \\text{SLEM}_{1,n} \\\\ + \\text{SLEM}_{2,1} & \\text{SLEM}_{2,2} & \\dots & \\text{SLEM}_{2,n} \\\\ + \\vdots & \\vdots & \\ddots & \\vdots \\\\ + \\text{SLEM}_{m,1} & \\text{SLEM}_{m,2} & \\dots & \\text{SLEM}_{m,n} + \\end{bmatrix} \\\\ + \\mathbf{SL} &= + \\begin{bmatrix} + \\text{SL}_{1} \\\\ + \\text{SL}_{2} \\\\ + \\vdots \\\\ + \\text{SL}_{m} + \\end{bmatrix} + \\end{split} Example ~~~~~~~ For a given model, we can have a matrix of secondary loss probabilities. Each row can represent a simulation and each column can -represent a loss type. In this example below we have three different -probability columns for different types of probability loss. E.g. the +represent a loss type. In this example below we have three different +probability columns for different types of probability loss. E.g. the probabilities of loss for simulation 1 are 0.95, 0.05, and 1.00. +------------+-------------+--------------+--------------+ diff --git a/pyfair/__init__.py b/pyfair/__init__.py index 08fd1e4..5dca5ad 100644 --- a/pyfair/__init__.py +++ b/pyfair/__init__.py @@ -1,6 +1,6 @@ """PyFair is an open source implementation of the FAIR methodology.""" -VERSION = '0.1-alpha.12' +from ._version import __version__ from . import model diff --git a/pyfair/_version.py b/pyfair/_version.py new file mode 100644 index 0000000..2a3adcf --- /dev/null +++ b/pyfair/_version.py @@ -0,0 +1 @@ +__version__ = "0.1-alpha.12" diff --git a/pyfair/model/model_calc.py b/pyfair/model/model_calc.py index b75511e..168ded9 100644 --- a/pyfair/model/model_calc.py +++ b/pyfair/model/model_calc.py @@ -13,16 +13,17 @@ class FairCalculations(object): 3) a multiplication function. """ + def __init__(self): # Lookup table for functions (no leaf nodes required) self._function_dict = { - 'Risk' : self._calculate_multiplication, - 'Loss Event Frequency' : self._calculate_multiplication, - 'Threat Event Frequency': self._calculate_multiplication, - 'Vulnerability' : self._calculate_step_average, - 'Loss Magnitude' : self._calculate_addition, - 'Primary Loss' : self._calculate_multiplication, - 'Secondary Loss' : self._calculate_multiplication, + "Risk": self._calculate_multiplication, + "Loss Event Frequency": self._calculate_multiplication, + "Threat Event Frequency": self._calculate_multiplication, + "Vulnerability": self._calculate_step_average, + "Loss Magnitude": self._calculate_addition, + "Primary Loss": self._calculate_multiplication, + "Secondary Loss": self._calculate_multiplication, } def calculate(self, parent_name, child_1_data, child_2_data): @@ -58,22 +59,11 @@ def calculate(self, parent_name, child_1_data, child_2_data): return calculated_result def _calculate_step_average(self, child_1_data, child_2_data): - """Get bool series based on step function, then average for vuln""" + """Return per-simulation boolean (as float) for Vulnerability: 1.0 if TC > CS, else 0.0""" # Get Trues (1) where child_2 (TCap) is greater than child_1 (CS) - # Otherwise False (0) - bool_series = child_1_data < child_2_data - # Treat those bools as 1 and 0 and get mean - bool_scalar_average = bool_series.mean() - # Create a long array of that mean - vuln_data = np.full( - len(bool_series), - bool_scalar_average - ) - # And put it in a series - vuln = pd.Series( - data=vuln_data, - index=bool_series.index - ) + bool_series = (child_1_data < child_2_data).astype(float) + # Return the per-simulation result as a Series + vuln = pd.Series(data=bool_series.values, index=bool_series.index) return vuln def _calculate_addition(self, child_1_data, child_2_data): diff --git a/pyfair/model/model_input.py b/pyfair/model/model_input.py index 769329f..1e3e6ab 100644 --- a/pyfair/model/model_input.py +++ b/pyfair/model/model_input.py @@ -29,26 +29,33 @@ class FairDataInput(object): is stored when converting to JSON or another serialization format. """ + def __init__(self): # These targets must be less than or equal to one - self._le_1_targets = ['Probability of Action', 'Vulnerability', 'Control Strength', 'Threat Capability'] - self._le_1_keywords = ['constant', 'high', 'mode', 'low', 'mean'] + self._le_1_targets = [ + "Probability of Action", + "Vulnerability", + "Control Strength", + "Threat Capability", + "Secondary Loss Event Frequency", + ] + self._le_1_keywords = ["constant", "high", "mode", "low", "mean"] # Parameter map associates parameters with functions self._parameter_map = { - 'constant': self._gen_constant, - 'high' : self._gen_pert, - 'mode' : self._gen_pert, - 'low' : self._gen_pert, - 'gamma' : self._gen_pert, - 'mean' : self._gen_normal, - 'stdev' : self._gen_normal, + "constant": self._gen_constant, + "high": self._gen_pert, + "mode": self._gen_pert, + "low": self._gen_pert, + "gamma": self._gen_pert, + "mean": self._gen_normal, + "stdev": self._gen_normal, } # List of keywords with function keys self._required_keywords = { - self._gen_constant: ['constant'], - self._gen_pert : ['low', 'mode', 'high'], - self._gen_normal : ['mean', 'stdev'], - } + self._gen_constant: ["constant"], + self._gen_pert: ["low", "mode", "high"], + self._gen_normal: ["mean", "stdev"], + } # Storage of inputs self._supplied_values = {} @@ -59,7 +66,7 @@ def get_supplied_values(self): ------- dict A dictionary of the values supplied to generate function. The - keys for the dict will be the target node as a string (e.g. + keys for the dict will be the target node as a string (e.g. 'Loss Event Frequency') and the values will be a sub-dictionary of keyword arguments ({'low': 50, 'mode}: 51, 'high': 52}). @@ -80,7 +87,11 @@ def _check_le_1(self, target, **kwargs): pass # If not, raise error else: - raise FairException('"{}" must have "{}" value between zero and one.'.format(target, key)) + raise FairException( + '"{}" must have "{}" value between zero and one.'.format( + target, key + ) + ) def _check_parameters(self, target_function, **kwargs): """Runs parameter checks @@ -94,7 +105,7 @@ def _check_parameters(self, target_function, **kwargs): for keyword, value in kwargs.items(): # Two conditions value_is_less_than_zero = value < 0 - keyword_is_relevant = keyword in ['mean', 'constant', 'low', 'mode', 'high'] + keyword_is_relevant = keyword in ["mean", "constant", "low", "mode", "high"] # Test conditions if keyword_is_relevant and value_is_less_than_zero: raise FairException('"{}" is less than zero.'.format(keyword)) @@ -104,7 +115,11 @@ def _check_parameters(self, target_function, **kwargs): if required_keyword in kwargs.keys(): pass else: - raise FairException('"{}" is missing "{}".'.format(str(target_function), required_keyword)) + raise FairException( + '"{}" is missing "{}".'.format( + str(target_function), required_keyword + ) + ) def generate(self, target, count, **kwargs): """Executes request, records parameters, and return random values @@ -123,7 +138,7 @@ def generate(self, target, count, **kwargs): The number of random numbers generated (or alternatively, the length of the Series returned). **kwargs - Keyword arguments with one of the following values: {`mean`, + Keyword arguments with one of the following values: {`mean`, `stdev`, `low`, `mode`, `high`, `gamma`, or `constant`}. Raises @@ -146,8 +161,8 @@ def generate(self, target, count, **kwargs): result = self._generate_single(target, count, **kwargs) # Explicitly insert optional keywords for model storage dict_keys = kwargs.keys() - if 'low' in dict_keys and 'gamma' not in dict_keys: - kwargs['gamma'] = 4 + if "low" in dict_keys and "gamma" not in dict_keys: + kwargs["gamma"] = 4 # Record and return self._supplied_values[target] = {**kwargs} return result @@ -192,16 +207,16 @@ def generate_multi(self, prefixed_target, count, kwargs_dict): { 'Reputational': { - 'Secondary Loss Event Frequency': {'constant': 4000}, + 'Secondary Loss Event Frequency': {'constant': 4000}, 'Secondary Loss Event Magnitude': { 'low': 10, 'mode': 20, 'high': 100 }, }, 'Legal': { - 'Secondary Loss Event Frequency': {'constant': 2000}, + 'Secondary Loss Event Frequency': {'constant': 2000}, 'Secondary Loss Event Magnitude': { 'low': 10, 'mode': 20, 'high': 100 - }, + }, } } @@ -242,7 +257,7 @@ def generate_multi(self, prefixed_target, count, kwargs_dict): """ # Remove prefix from target - final_target = prefixed_target.lstrip('multi_') + final_target = prefixed_target.lstrip("multi_") # Create a container for dataframes df_dict = {target: pd.DataFrame() for target in kwargs_dict.keys()} # For each target @@ -255,9 +270,9 @@ def generate_multi(self, prefixed_target, count, kwargs_dict): # Put in dict df_dict[target][column] = s # Get partial secondary losses and sum up all the values - summed = np.sum(df.prod(axis=1) for df in df_dict.values()) + summed = sum(df.prod(axis=1) for df in df_dict.values()) # Record params - new_target = 'multi_' + final_target + new_target = "multi_" + final_target self._supplied_values[new_target] = kwargs_dict return summed @@ -294,12 +309,12 @@ def supply_raw(self, target, array): s = pd.Series(clean_array) # Check numeric and not null if s.isnull().any(): - raise FairException('Supplied data contains null values') + raise FairException("Supplied data contains null values") # Ensure values are appropriate if target in self._le_1_targets: if s.max() > 1 or s.min() < 0: - raise FairException(f'{target} data greater or less than one') - self._supplied_values[target] = {'raw': s.values.tolist()} + raise FairException(f"{target} data greater or less than one") + self._supplied_values[target] = {"raw": s.values.tolist()} return s.values def _determine_func(self, **kwargs): @@ -309,24 +324,22 @@ def _determine_func(self, **kwargs): if key not in self._parameter_map.keys(): raise FairException('"{}"" is not a recognized keyword'.format(key)) # Check whether all keys go to same function via set comprension - functions = list(set([ - self._parameter_map[key] - for key - in kwargs.keys() - ])) + functions = list(set([self._parameter_map[key] for key in kwargs.keys()])) if len(functions) > 1: - raise FairException('"{}" mixes incompatible keywords.'.format(str(kwargs.keys()))) + raise FairException( + '"{}" mixes incompatible keywords.'.format(str(kwargs.keys())) + ) else: function = functions[0] return function def _gen_constant(self, count, **kwargs): """Generates constant array of size `count`""" - return np.full(count, kwargs['constant']) + return np.full(count, kwargs["constant"]) def _gen_normal(self, count, **kwargs): """Geneates random normally-distributed array of size `count`""" - normal = scipy.stats.norm(loc=kwargs['mean'], scale=kwargs['stdev']) + normal = scipy.stats.norm(loc=kwargs["mean"], scale=kwargs["stdev"]) rvs = normal.rvs(count) return rvs @@ -340,10 +353,12 @@ def _gen_pert(self, count, **kwargs): def _check_pert(self, **kwargs): """Does the work of ensuring BetaPert distribution is valid""" conditions = { - 'mode >= low' : kwargs['mode'] >= kwargs['low'], - 'high >= mode' : kwargs['high'] >= kwargs['mode'], + "mode >= low": kwargs["mode"] >= kwargs["low"], + "high >= mode": kwargs["high"] >= kwargs["mode"], } for condition_name, condition_value in conditions.items(): if condition_value == False: - err = 'Param "{}" fails PERT requirement "{}".'.format(kwargs, condition_name) + err = 'Param "{}" fails PERT requirement "{}".'.format( + kwargs, condition_name + ) raise FairException(err) diff --git a/pyfair/report/base_report.py b/pyfair/report/base_report.py index 73477d4..2692412 100644 --- a/pyfair/report/base_report.py +++ b/pyfair/report/base_report.py @@ -11,61 +11,57 @@ import numpy as np import pandas as pd -from .. import VERSION +from .._version import __version__ from .tree_graph import FairTreeGraph from .distribution import FairDistributionCurve from .exceedence import FairExceedenceCurves from ..utility.fair_exception import FairException from .violin import FairViolinPlot +from ..model.meta_model import FairMetaModel +from ..utility.beta_pert import FairBetaPert class FairBaseReport(object): """A base class for creating FairModel and FairMetaModel reports This class exists to provide a common base for mutliple report types. - It carries with it formatting data, file paths, and a variety of + It carries with it formatting data, file paths, and a variety of methods for creating report components. It is not intended to be instantiated on its own. """ - def __init__(self, currency_prefix='$'): + + def __init__(self, currency_prefix="$"): # Add formatting strings self._currency_prefix = currency_prefix self._model_or_models = None - self._currency_format_string = currency_prefix + '{0:,.0f}' - self._float_format_string = '{0:.2f}' + self._currency_format_string = currency_prefix + "{0:,.0f}" + self._float_format_string = "{0:.2f}" self._format_strings = { - 'Risk' : self._currency_format_string, - 'Loss Event Frequency' : self._float_format_string, - 'Threat Event Frequency' : self._float_format_string, - 'Vulnerability' : self._float_format_string, - 'Contact Frequency' : self._float_format_string, - 'Probability of Action' : self._float_format_string, - 'Threat Capability' : self._float_format_string, - 'Control Strength' : self._float_format_string, - 'Loss Magnitude' : self._currency_format_string, - 'Primary Loss' : self._currency_format_string, - 'Secondary Loss' : self._currency_format_string, - 'Secondary Loss Event Frequency' : self._float_format_string, - 'Secondary Loss Event Magnitude' : self._currency_format_string, + "Risk": self._currency_format_string, + "Loss Event Frequency": self._float_format_string, + "Threat Event Frequency": self._float_format_string, + "Vulnerability": self._float_format_string, + "Contact Frequency": self._float_format_string, + "Probability of Action": self._float_format_string, + "Threat Capability": self._float_format_string, + "Control Strength": self._float_format_string, + "Loss Magnitude": self._currency_format_string, + "Primary Loss": self._currency_format_string, + "Secondary Loss": self._currency_format_string, + "Secondary Loss Event Frequency": self._float_format_string, + "Secondary Loss Event Magnitude": self._currency_format_string, } # Add locations self._fair_location = pathlib.Path(__file__).parent.parent - self._static_location = self._fair_location / 'static' - self._logo_location = self._static_location / 'white_python_logo.png' + self._static_location = self._fair_location / "static" + self._logo_location = self._static_location / "white_python_logo.png" self._template_paths = { - 'css' : self._static_location / 'fair.css', - 'simple': self._static_location / 'simple.html' + "css": self._static_location / "fair.css", + "simple": self._static_location / "simple.html", } - self._param_cols = [ - 'low', - 'most_likely', - 'high', - 'constant', - 'mean', - 'stdev' - ] + self._param_cols = ["low", "most_likely", "high", "constant", "mean", "stdev"] def _input_check(self, value): """Check input value for report is appropriate @@ -76,25 +72,29 @@ def _input_check(self, value): """ # If it's a model or metamodel, plug it in a dict. rv = {} - if value.__class__.__name__ in ['FairModel', 'FairMetaModel']: + if value.__class__.__name__ in ["FairModel", "FairMetaModel"]: rv[value.get_name()] = value return rv # Check for iterable. - if not hasattr(value, '__iter__'): - raise FairException('Input is not a FairModel, FairMetaModel, or an iterable.') + if not hasattr(value, "__iter__"): + raise FairException( + "Input is not a FairModel, FairMetaModel, or an iterable." + ) if len(value) == 0: - raise FairException('Empty iterable where iterable of models expected.') + raise FairException("Empty iterable where iterable of models expected.") # Iterate and process remainder. for proported_model in value: # Check if model - if proported_model.__class__.__name__ in ['FairModel', 'FairMetaModel']: + if proported_model.__class__.__name__ in ["FairModel", "FairMetaModel"]: # Check if calculated if proported_model.calculation_completed(): rv[proported_model.get_name()] = proported_model else: - raise FairException('Model or FairModel has not been calculated.') + raise FairException("Model or FairModel has not been calculated.") else: - raise FairException('Iterable member is not a FairModel or FairMetaModel') + raise FairException( + "Iterable member is not a FairModel or FairMetaModel" + ) return rv def get_format_strings(self): @@ -107,16 +107,16 @@ def get_format_strings(self): """ return self._format_strings - def base64ify(self, image, alternative_text='', options=''): + def base64ify(self, image, alternative_text="", options=""): """Binary data into embeddable tag with base64 data - + To avoid having separate image files, pyfair simply embeds report images as base64 image tags. base64ify() is a convenience function that creates these tags. image : [bytes, str, pathlib.Path] The binary data, path string, or pathlib.Path containing either the data itself or a file of data. - + alternative_text: str, optional Alternative text to be showed in the event the image does not properly render @@ -134,15 +134,15 @@ def base64ify(self, image, alternative_text='', options=''): """ # If path, open and read. if type(image) == str or isinstance(image, pathlib.Path): - with open(image, 'rb') as f: + with open(image, "rb") as f: binary_data = f.read() # If bytes, jsut write elif type(image) == bytes: binary_data = image else: - raise TypeError(str(image) + ' is not a string, path, or bytes.') + raise TypeError(str(image) + " is not a string, path, or bytes.") # Get base64 string - base64_string = base64.b64encode(binary_data).decode('utf8') + base64_string = base64.b64encode(binary_data).decode("utf8") # Create tag tag = f'{alternative_text}' return tag @@ -162,13 +162,13 @@ def to_html(self, output_path): The output path to which the HTML data is written """ output = self._construct_output() - with open(output_path, 'w+') as f: + with open(output_path, "w+") as f: f.write(output) def _fig_to_img_tag(self, fig): """Converts matplotlib fig to base64 encoded img tag""" data = io.BytesIO() - fig.savefig(data, format='png', transparent=True) + fig.savefig(data, format="png", transparent=True) data.seek(0) img_tag = self.base64ify(data.read()) return img_tag @@ -176,11 +176,7 @@ def _fig_to_img_tag(self, fig): def _get_data_table(self, model): """Takes model and gnerates HTML table from the model's results""" data = model.export_results().dropna(axis=1) - table = data.to_html( - border=0, - justify='left', - classes='fair_metadata_table' - ) + table = data.to_html(border=0, justify="left", classes="fair_metadata_table") return table def _get_parameter_table(self, model): @@ -197,14 +193,22 @@ def _get_metadata_table(self): username = getpass.getuser() # The exception this throws is not conspicuously documented except Exception: - username = 'Unknown' + username = "Unknown" # Add metadata - metadata = pd.Series({ - 'Author': username, - 'Created': str(datetime.datetime.now()).partition('.')[0], - 'PyFair Version': VERSION, - 'Type': type(self).__name__ - }).to_frame().to_html(border=0, header=False, justify='left', classes='fair_metadata_table') + metadata = ( + pd.Series( + { + "Author": username, + "Created": str(datetime.datetime.now()).partition(".")[0], + "PyFair Version": __version__, + "Type": type(self).__name__, + } + ) + .to_frame() + .to_html( + border=0, header=False, justify="left", classes="fair_metadata_table" + ) + ) return metadata def _get_tree(self, model): @@ -224,7 +228,7 @@ def _get_distribution(self, model_or_models, currency_prefix): def _get_distribution_icon(self, model, target): """Create base64 icon string using FairDistributionCurve""" fdc = FairDistributionCurve(model, self._currency_prefix) - fig, ax = fdc.generate_icon(model.get_name(), target) + fig, ax = fdc.generate_icon(model.get_name(), target) img_tag = self._fig_to_img_tag(fig) return img_tag @@ -246,28 +250,36 @@ def _get_overview_table(self, model_or_models): """Create a risk overview table using a model or list of models""" # Get final Risk vectors for all models try: - risk_results = pd.DataFrame({ - name: model.export_results()['Risk'] - for name, model - in model_or_models.items() - }) + risk_results = pd.DataFrame( + { + name: model.export_results()["Risk"] + for name, model in model_or_models.items() + } + ) except KeyError: raise FairException("No 'Risk' key. Model likely uncalculated.") # Get aggregate statistics and set titles - risk_results = risk_results.agg(['mean', 'std', 'min', 'max']) - risk_results.index = ['Mean', 'Stdev', 'Minimum', 'Maximum'] + risk_results = risk_results.agg(["mean", "std", "min", "max"]) + risk_results.index = ["Mean", "Stdev", "Minimum", "Maximum"] # Format risk results into dataframe - overview_df = risk_results.applymap(lambda x: self._format_strings['Risk'].format(x)) - overview_df.loc['Simulations'] = [ - '{0:,.0f}'.format(len(model.export_results())) - for model - in model_or_models.values() + overview_df = risk_results.applymap( + lambda x: self._format_strings["Risk"].format(x) + ) + overview_df.loc["Simulations"] = [ + "{0:,.0f}".format(len(model.export_results())) + for model in model_or_models.values() ] # Add data - overview_df.loc['Identifier'] = [model.get_uuid() for model in model_or_models.values()] - overview_df.loc['Model Type'] = [model.__class__.__name__ for model in model_or_models.values()] + overview_df.loc["Identifier"] = [ + model.get_uuid() for model in model_or_models.values() + ] + overview_df.loc["Model Type"] = [ + model.__class__.__name__ for model in model_or_models.values() + ] # Export df to HTML and return - overview_html = overview_df.to_html(border=0, header=True, justify='left', classes='fair_table') + overview_html = overview_df.to_html( + border=0, header=True, justify="left", classes="fair_table" + ) return overview_html def _get_model_parameter_table(self, model): @@ -276,8 +288,7 @@ def _get_model_parameter_table(self, model): # Remove items we don't want. params = { key: value - for key, value - in params.items() + for key, value in params.items() if key in self._format_strings.keys() } # Set up alias and dataframe @@ -291,10 +302,10 @@ def _get_model_parameter_table(self, model): param_df[column] = np.nan # Create descriptive statistics from parameter df param_df = param_df[self._param_cols] - param_df['mean'] = model.export_results().mean(axis=0) - param_df['stdev'] = model.export_results().std(axis=0) - param_df['min'] = model.export_results().min(axis=0) - param_df['max'] = model.export_results().max(axis=0) + param_df["mean"] = model.export_results().mean(axis=0) + param_df["stdev"] = model.export_results().std(axis=0) + param_df["min"] = model.export_results().min(axis=0) + param_df["max"] = model.export_results().max(axis=0) # Transform param_df in place param_df = param_df.apply( lambda row: pd.Series( @@ -302,31 +313,25 @@ def _get_model_parameter_table(self, model): # ... by getting the format string and formatting fs[row.name].format(item) # For each item - for item - in row + for item in row ], # And keep the index - index=row.index.values + index=row.index.values, ), # On a column basis axis=1, ) - param_df = param_df.applymap(lambda x: '' if 'nan' in x else x) + param_df = param_df.applymap(lambda x: "" if "nan" in x else x) # Do not truncate our base64 images. - pd.set_option('display.max_colwidth', None) + pd.set_option("display.max_colwidth", None) # Create our distribution icons as strings in table - param_df['distribution'] = [ + param_df["distribution"] = [ self._get_distribution_icon(model, target) - for target - in param_df.index.values + for target in param_df.index.values ] # Export table to html detail_table = param_df.to_html( - border=0, - header=True, - justify='left', - classes='fair_table', - escape=False + border=0, header=True, justify="left", classes="fair_table", escape=False ) return detail_table @@ -334,30 +339,30 @@ def _get_metamodel_parameter_table(self, metamodel): """Create table for metamodel""" # Create our table, transpose it, get descriptive statistics risk_df = metamodel.export_results().T - risk_df = pd.DataFrame({ - 'mean' : risk_df.mean(axis=1), - 'stdev': risk_df.std(axis=1), - 'min' : risk_df.min(axis=1), - 'max' : risk_df.max(axis=1), - 'geo_mean': risk_df.apply(lambda x: np.exp(np.mean(np.log(x[x > 0]))), axis=1), - 'mode': risk_df.mode(axis=1)[0], - '90th_percentile': risk_df.quantile(0.90, axis=1), - '99th_percentile': risk_df.quantile(0.99, axis=1) - }) + risk_df = pd.DataFrame( + { + "mean": risk_df.mean(axis=1), + "stdev": risk_df.std(axis=1), + "min": risk_df.min(axis=1), + "max": risk_df.max(axis=1), + "geo_mean": risk_df.apply( + lambda x: np.exp(np.mean(np.log(x[x > 0]))), axis=1 + ), + "mode": risk_df.mode(axis=1)[0], + "90th_percentile": risk_df.quantile(0.90, axis=1), + "99th_percentile": risk_df.quantile(0.99, axis=1), + } + ) # Format the risk DF with the appropriate strings risk_df = risk_df.apply( lambda row: pd.Series( - [self._format_strings['Risk'].format(item) for item in row], - index=row.index.values + [self._format_strings["Risk"].format(item) for item in row], + index=row.index.values, ), axis=1, ) # Do not truncate our base64 images. detail_table = risk_df.to_html( - border=0, - header=True, - justify='left', - classes='fair_table', - escape=False + border=0, header=True, justify="left", classes="fair_table", escape=False ) return detail_table diff --git a/pyfair/utility/database.py b/pyfair/utility/database.py index a85a110..53a6c92 100644 --- a/pyfair/utility/database.py +++ b/pyfair/utility/database.py @@ -35,6 +35,7 @@ class FairDatabase(object): >>> query_output_string = db2.query('SELECT uuid, json FROM model') """ + def __init__(self, path): self._path = pathlib.Path(path) self._initialize() @@ -42,21 +43,25 @@ def __init__(self, path): def _initialize(self): """Initialize database with tables if necessary.""" with sqlite3.connect(self._path) as conn: - conn.execute("""CREATE TABLE IF NOT EXISTS models ( + conn.execute( + """CREATE TABLE IF NOT EXISTS models ( uuid string, name string, creation_date text NOT NULL, json string NOT NULL, CONSTRAINT model_pk PRIMARY KEY (uuid)); - """) - conn.execute("""CREATE TABLE IF NOT EXISTS results ( + """ + ) + conn.execute( + """CREATE TABLE IF NOT EXISTS results ( uuid string, mean real NOT NULL, stdev real NOT NULL, min real NOT NULL, max real NOT NULL, CONSTRAINT results_pk PRIMARY KEY (uuid)); - """) + """ + ) def _dict_factory(self, cursor, row): """Convenience function for sqlite queries""" @@ -69,9 +74,16 @@ def _dict_factory(self, cursor, row): def load(self, name_or_uuid): """Loads a model from the database - This takes a name or UUID and looks up the model using a UUID - function using self._load_uuid(). If that fails, it attempts to - look of the funciton by name using self._load_name(). + This takes a name or UUID. It first attempts to interpret the input + as a UUID and load directly. If that fails (e.g., the input is not + in UUID format), it attempts to load by model name. + + If loading by name, the method retrieves the *first* model found + matching that name (based on internal database ordering). If multiple + distinct models (with different UUIDs) share the same name, this + may not be the most recent or a specific version unless names are + managed uniquely. For precise loading, using the model's UUID is + recommended. Parameters ---------- @@ -79,7 +91,7 @@ def load(self, name_or_uuid): The name model or its UUID string Returns - ------- + ------ FairModel or FairMetaModel The model or metamodel corresponding with the input UUID string or input name string. @@ -89,6 +101,9 @@ def load(self, name_or_uuid): FairException When the UUID or name does not exist in the database + See Also + -------- + store : Method for storing models. Note its behavior regarding UUIDs. """ # If it is a valid UUID try: @@ -110,9 +125,9 @@ def _load_name(self, name): cursor.execute("SELECT uuid FROM models WHERE name = ?", (name,)) result = cursor.fetchone() if not result: - raise FairException('Name for model not found.') + raise FairException("Name for model not found.") # Use model UUID query to load via _load_uuid function - model = self._load_uuid(result['uuid']) + model = self._load_uuid(result["uuid"]) return model def _load_uuid(self, uuid): @@ -125,17 +140,17 @@ def _load_uuid(self, uuid): cursor.execute("SELECT * FROM models WHERE uuid = ?", (uuid,)) model_data = cursor.fetchone() if not model_data: - raise FairException('UUID for model not found.') + raise FairException("UUID for model not found.") # Load model type based on json - json_data = model_data['json'] + json_data = model_data["json"] model_param_data = json.loads(json_data) - model_type = model_param_data['type'] - if model_type == 'FairMetaModel': + model_type = model_param_data["type"] + if model_type == "FairMetaModel": model = FairMetaModel.read_json(json_data) - elif model_type == 'FairModel': + elif model_type == "FairModel": model = FairModel.read_json(json_data) else: - raise FairException('Unrecognized model type.') + raise FairException("Unrecognized model type.") return model def store(self, model_or_metamodel): @@ -147,10 +162,31 @@ def store(self, model_or_metamodel): statistics about the risk are stored in the 'results' table, and 2) the model data is stored in the 'models' table. + The model's UUID (obtained via `model_or_metamodel.get_uuid()`) is used + as the primary key in the database. + + If a record with the same UUID already exists, `INSERT OR REPLACE` + semantics are used, meaning the existing record for that UUID will be + overwritten with the data from the model being stored. This is how + updates to an existing model (identified by its UUID) should be performed: + load the model, modify the loaded instance, then store that same instance. + + Creating a new `FairModel()` instance results in a new, unique UUID. + Storing such a new instance will always create a new record or replace + an existing record *only if that new UUID happened to match an old one* + (which is astronomically unlikely for standard UUIDs). + It does not replace based on model name. + + Parameters + ---------- + model_or_metamodel : FairModel or FairMetaModel + The model instance to store. + Raises ------ FairException If model or metamodel is not yet calculated + and thus not ready for storage. """ m = model_or_metamodel @@ -160,30 +196,25 @@ def store(self, model_or_metamodel): # Export from model meta = json.loads(m.to_json()) json_data = m.to_json() - results = m.export_results()['Risk'] + results = m.export_results()["Risk"] # Write to database with sqlite3.connect(self._path) as conn: cursor = conn.cursor() # Write model data cursor.execute( """INSERT OR REPLACE INTO models VALUES(?, ?, ?, ?)""", - ( - meta['model_uuid'], - meta['name'], - meta['creation_date'], - json_data - ) + (meta["model_uuid"], meta["name"], meta["creation_date"], json_data), ) # Write cached results cursor.execute( """INSERT OR REPLACE INTO results VALUES(?, ?, ?, ?, ?)""", ( - meta['model_uuid'], + meta["model_uuid"], results.mean(axis=0), results.std(axis=0), results.min(axis=0), - results.max(axis=0) - ) + results.max(axis=0), + ), ) # Vacuum database conn = sqlite3.connect(self._path) diff --git a/test_fair.py b/test_fair.py deleted file mode 100644 index d210961..0000000 --- a/test_fair.py +++ /dev/null @@ -1,20 +0,0 @@ -from pyfair import FairModel - -# Create a simple FAIR model -model = FairModel(name="Basic Model", n_simulations=10000) - -# Set parameters -model.input_data("Loss Event Frequency", mean=0.3, stdev=0.1) -model.input_data("Loss Magnitude", constant=5000000) - -# Calculate and display results -model.calculate_all() - -# Get results and print summary statistics -results = model.export_results() -print("\nModel Results Summary:") -print("-" * 20) -print(f"Risk Statistics:") -print(f"Mean: ${results['Risk'].mean():,.2f}") -print(f"Median: ${results['Risk'].median():,.2f}") -print(f"95th Percentile: ${results['Risk'].quantile(0.95):,.2f}") diff --git a/tests/model/test_model_calc.py b/tests/model/test_model_calc.py index 6d6794e..fca9f47 100644 --- a/tests/model/test_model_calc.py +++ b/tests/model/test_model_calc.py @@ -9,59 +9,47 @@ class TestFairModelCalc(unittest.TestCase): # Raw data - _CHILD_1_DATA = pd.Series([1,2,3,4,5]) - _CHILD_2_DATA = pd.Series([5,4,3,2,1]) - _MULT_OUTPUT = pd.Series([5,8,9,8,5]) - _ADD_OUTPUT = pd.Series([6,6,6,6,6]) - _STEP_OUTPUT = pd.Series([.4, .4, .4, .4, .4]) + _CHILD_1_DATA = pd.Series([1, 2, 3, 4, 5]) + _CHILD_2_DATA = pd.Series([5, 4, 3, 2, 1]) + _MULT_OUTPUT = pd.Series([5, 8, 9, 8, 5]) + _ADD_OUTPUT = pd.Series([6, 6, 6, 6, 6]) + _STEP_OUTPUT = pd.Series([1, 1, 0, 0, 0], dtype=float) # Keys _MULTIPLICATION_ITEMS = [ - 'Risk', - 'Loss Event Frequency', - 'Threat Event Frequency', - 'Primary Loss', - 'Secondary Loss', + "Risk", + "Loss Event Frequency", + "Threat Event Frequency", + "Primary Loss", + "Secondary Loss", ] - _ADDITION_ITEMS = ['Loss Magnitude'] - _STEP_ITEMS = ['Vulnerability'] + _ADDITION_ITEMS = ["Loss Magnitude"] + _STEP_ITEMS = ["Vulnerability"] def setUp(self): self._calc = FairCalculations() - + def tearDown(self): self._calc = None def test_multiplication(self): """Test multiplication keywords and functions""" for key in self._MULTIPLICATION_ITEMS: - result = self._calc.calculate( - key, - self._CHILD_1_DATA, - self._CHILD_2_DATA - ) + result = self._calc.calculate(key, self._CHILD_1_DATA, self._CHILD_2_DATA) self.assertTrue(result.equals(self._MULT_OUTPUT)) def test_addition(self): """Test addition keywords and functions""" for key in self._ADDITION_ITEMS: - result = self._calc.calculate( - key, - self._CHILD_1_DATA, - self._CHILD_2_DATA - ) + result = self._calc.calculate(key, self._CHILD_1_DATA, self._CHILD_2_DATA) self.assertTrue(result.equals(self._ADD_OUTPUT)) def test_step_average(self): """Test step function keywords and functions""" for key in self._STEP_ITEMS: - result = self._calc.calculate( - key, - self._CHILD_1_DATA, - self._CHILD_2_DATA - ) + result = self._calc.calculate(key, self._CHILD_1_DATA, self._CHILD_2_DATA) self.assertTrue(result.equals(self._STEP_OUTPUT)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_runner.py b/tests/test_runner.py index a11ab32..d2f368a 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -1,4 +1,5 @@ """Script to create and run a test suite.""" + import pathlib import sys import unittest @@ -54,16 +55,17 @@ utility.test_fair_exception, ] -# Create loader and suite -loader = unittest.TestLoader() -suite = unittest.TestSuite() +if __name__ == "__main__": + # Create loader and suite + loader = unittest.TestLoader() + suite = unittest.TestSuite() -# Add to suite -for test_module in test_modules: - loaded_test = loader.loadTestsFromModule(test_module) - suite.addTest(loaded_test) + # Add to suite + for test_module in test_modules: + loaded_test = loader.loadTestsFromModule(test_module) + suite.addTest(loaded_test) -# Create runner and run -runner = unittest.TextTestRunner(verbosity=5) -result = runner.run(suite) -sys.exit(0 if result.wasSuccessful() else 1) + # Create runner and run + runner = unittest.TextTestRunner(verbosity=5) + result = runner.run(suite) + sys.exit(0 if result.wasSuccessful() else 1)