Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion shablona/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
#from .constants import *
#CONSTANT1 = 5
#CONSTANT2 = 'i am constant'

from .shablona import *
from .version import __version__
from .version import __version__
2 changes: 2 additions & 0 deletions shablona/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CONSTANT1 = 5
CONSTANT2 = 'i am constant'
159 changes: 85 additions & 74 deletions shablona/shablona.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,50 @@
import numpy as np
import pandas as pd
from matplotlib import mlab
from matplotlib import mlab
from scipy.special import erf
import scipy.optimize as opt

from .constants import CONSTANT1, CONSTANT2

def transform_data(data):
"""
Function that takes experimental data and gives us the
def print_constants():
"""
Function that prints the constants defined by the package.
This is a demonstration of the proper way to define package-wide
constants and then use them throughout the package.
"""
print('CONSTANT1: %s' % CONSTANT1)
print('CONSTANT2: %s' % CONSTANT2)


def transform_data(data):
"""
Function that takes experimental data and gives us the
dependent/independent variables for analysis

Parameters
----------
data : Pandas DataFrame or string.
If this is a DataFrame, it should have the columns `contrast1` and
`answer` from which the dependent and independent variables will be
extracted. If this is a string, it should be the full path to a csv
file that contains data that can be read into a DataFrame with this
If this is a DataFrame, it should have the columns `contrast1` and
`answer` from which the dependent and independent variables will be
extracted. If this is a string, it should be the full path to a csv
file that contains data that can be read into a DataFrame with this
specification.

Returns
-------
x : array
The unique contrast differences.
y : array
x : array
The unique contrast differences.
y : array
The proportion of '2' answers in each contrast difference
n : array
The number of trials in each x,y condition
The number of trials in each x,y condition
"""
if isinstance(data, str):
data = pd.read_csv(data)

contrast1 = data['contrast1']
answers = data['answer']

x = np.unique(contrast1)
y = []
n = []
Expand All @@ -44,155 +55,155 @@ def transform_data(data):
answer1 = len(np.where(answers[idx[0]] == 1)[0])
y.append(answer1 / n[-1])
return x, y, n


def cumgauss(x, mu, sigma):
"""
The cumulative Gaussian at x, for the distribution with mean mu and
standard deviation sigma.
standard deviation sigma.

Parameters
----------
x : float or array
The values of x over which to evaluate the cumulative Gaussian function

mu : float
mu : float
The mean parameter. Determines the x value at which the y value is 0.5
sigma : float
The variance parameter. Determines the slope of the curve at the point

sigma : float
The variance parameter. Determines the slope of the curve at the point
of Deflection

Returns
-------

g : float or array
The cumulative gaussian with mean $\\mu$ and variance $\\sigma$
evaluated at all points in `x`.
The cumulative gaussian with mean $\\mu$ and variance $\\sigma$
evaluated at all points in `x`.

Notes
-----
Based on: http://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function

The cumulative Gaussian function is defined as:

.. math::

\\Phi(x) = \\frac{1}{2} [1 + erf(\\frac{x}{\\sqrt{2}})]

Where, $erf$, the error function is defined as:

.. math::

erf(x) = \\frac{1}{\\sqrt{\\pi}} \int_{-x}^{x} e^{t^2} dt

"""
return 0.5 * (1 + erf((x - mu)/(np.sqrt(2) * sigma)))


def opt_err_func(params, x, y, func):
"""
Error function for fitting a function using non-linear optimization

Parameters
----------
params : tuple
A tuple with the parameters of `func` according to their order of
A tuple with the parameters of `func` according to their order of
input

x : float array
An independent variable.
x : float array
An independent variable.

y : float array
The dependent variable.
The dependent variable.

func : function
A function with inputs: `(x, *params)`

Returns
-------
float array
The marginals of the fit to x/y given the params
"""
return y - func(x, *params)


class Model(object):
""" Class for fitting cumulative Gaussian functions to data"""
def __init__(self, func=cumgauss):
""" Initialize a model object
""" Initialize a model object

Parameters
----------
data : Pandas DataFrame
data : Pandas DataFrame
Data from a subjective contrast judgement experiment

func : callable, optional
A function that relates x and y through a set of parameters.
Default: :func:`cumgauss`
"""
self.func = func

def fit(self, x, y, initial=[0.5, 1]):
"""
"""
Fit a Model to data

Parameters
----------
x : float or array
The independent variable: contrast values presented in the
experiment
y : float or array
The dependent variable
The independent variable: contrast values presented in the
experiment
y : float or array
The dependent variable

Returns
-------
fit : :class:`Fit` instance
A :class:`Fit` object that contains the parameters of the model.

"""
params, _ = opt.leastsq(opt_err_func, initial,
params, _ = opt.leastsq(opt_err_func, initial,
args=(x, y, self.func))
return Fit(self, params)


class Fit(object):
"""
Class for representing a fit of a model to data
"""
def __init__(self, model, params):
"""
"""
Initialize a :class:`Fit` object

Parameters
----------
model : a :class:`Model` instance
An object representing the model used
params : array or list
The parameters of the model evaluated for the data

params : array or list
The parameters of the model evaluated for the data

"""
self.model = model
self.params = params

def predict(self, x):
"""
Predict values of the dependent variable based on values of the
"""
Predict values of the dependent variable based on values of the
indpendent variable.

Parameters
----------
x : float or array
Values of the independent variable. Can be values presented in
the experiment. For out-of-sample prediction (e.g. in
cross-validation), these can be values
Values of the independent variable. Can be values presented in
the experiment. For out-of-sample prediction (e.g. in
cross-validation), these can be values
that were not presented in the experiment.

Returns
-------
y : float or array
Predicted values of the dependent variable, corresponding to
Predicted values of the dependent variable, corresponding to
values of the independent variable.
"""
return self.model.func(x, *self.params)
return self.model.func(x, *self.params)
36 changes: 20 additions & 16 deletions shablona/tests/test_shablona.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,29 @@
data_path = op.join(sb.__path__[0], 'data')


def test_constants():
npt.assert_(sb.CONSTANT1 == 5)


def test_transform_data():
"""
Testing the transformation of the data from raw data to functions
"""
Testing the transformation of the data from raw data to functions
used for fitting a function.

"""
# We start with actual data. We test here just that reading the data in
# different ways ultimately generates the same arrays.
from matplotlib import mlab
# We start with actual data. We test here just that reading the data in
# different ways ultimately generates the same arrays.
from matplotlib import mlab
ortho = mlab.csv2rec(op.join(data_path, 'ortho.csv'))
para = mlab.csv2rec(op.join(data_path, 'para.csv'))
x1, y1, n1 = sb.transform_data(ortho)
x2, y2, n2 = sb.transform_data(op.join(data_path, 'ortho.csv'))
npt.assert_equal(x1, x2)
npt.assert_equal(y1, y2)
# We can also be a bit more critical, by testing with data that we
# We can also be a bit more critical, by testing with data that we
# generate, and should produce a particular answer:
my_data = pd.DataFrame(
np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2], [0.3, 1],
np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2], [0.3, 1],
[0.3, 1]]),
columns=['contrast1', 'answer'])
my_x, my_y, my_n = sb.transform_data(my_data)
Expand All @@ -40,30 +44,30 @@ def test_cum_gauss():
y = sb.cumgauss(x, mu, sigma)
# A basic test that the input and output have the same shape:
npt.assert_equal(y.shape , x.shape)
# The function evaluated over items symmetrical about mu should be
# The function evaluated over items symmetrical about mu should be
# symmetrical relative to 0 and 1:
npt.assert_equal(y[0], 1 - y[-1])
# Approximately 68% of the Gaussian distribution is in mu +/- sigma, so
# Approximately 68% of the Gaussian distribution is in mu +/- sigma, so
# the value of the cumulative Gaussian at mu - sigma should be
# approximately equal to (1 - 0.68/2). Note the low precision!
npt.assert_almost_equal(y[0], (1 - 0.68) / 2, decimal=2)


def test_opt_err_func():
# We define a truly silly function, that returns its input, regardless of
# We define a truly silly function, that returns its input, regardless of
# the params:
def my_silly_func(x, my_first_silly_param, my_other_silly_param):
return x

# The silly function takes two parameters and ignores them
my_params = [1, 10]
my_x = np.linspace(-1, 1, 12)
my_y = my_x
my_err = sb.opt_err_func(my_params, my_x, my_y, my_silly_func)
# Since x and y are equal, the error is zero:
npt.assert_equal(my_err, np.zeros(my_x.shape[0]))
# Let's consider a slightly less silly function, that implements a linear

# Let's consider a slightly less silly function, that implements a linear
# relationship between inputs and outputs:
def not_so_silly_func(x, a, b):
return x*a + b
Expand All @@ -75,8 +79,8 @@ def not_so_silly_func(x, a, b):
my_err = sb.opt_err_func(my_params, my_x, my_y, not_so_silly_func)
# Since x and y are equal, the error is zero:
npt.assert_equal(my_err, np.zeros(my_x.shape[0]))


def test_Model():
""" """
M = sb.Model()
Expand Down