-
Notifications
You must be signed in to change notification settings - Fork 38
Simple Impute for timeseries #975
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
61d361b
73aa16a
a5d9b27
ec4f791
1b1abc2
e712a46
3bfe0ce
058a177
6c23648
810f68a
bf207a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| import numpy as np | ||
| import pandas as pd | ||
| import pytest | ||
| import scipy.sparse as sp | ||
| from anndata import AnnData | ||
| from ehrdata.core.constants import CATEGORICAL_TAG, DEFAULT_TEM_LAYER_NAME, FEATURE_TYPE_KEY, NUMERIC_TAG | ||
| from matplotlib.testing.compare import compare_images | ||
|
|
@@ -141,7 +142,11 @@ def mcar_edata(rng) -> ed.EHRData: | |
| missing_indices = rng.choice(a=[False, True], size=data.shape, p=[1 - 0.1, 0.1]) | ||
| data[missing_indices] = np.nan | ||
|
|
||
| return ed.EHRData(data) | ||
| data_3d = rng.random((100, 10, 3)) | ||
eroell marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| missing_indices = rng.choice(a=[False, True], size=data_3d.shape, p=[1 - 0.1, 0.1]) | ||
eroell marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| data_3d[missing_indices] = np.nan | ||
|
|
||
| return ed.EHRData(data, layers={DEFAULT_TEM_LAYER_NAME: data_3d}) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
|
|
@@ -151,6 +156,20 @@ def edata_mini(): | |
| ) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def edata_mini_3D_missing_values(): | ||
| tiny_mixed_array = np.array( | ||
| [ | ||
| [[138, 139], [78, np.nan], [77, 76], [1, 2], ["A", "B"], ["Yes", np.nan]], | ||
| [[140, 141], [80, 81], [60, 90], [0, 1], ["A", "A"], ["Yes", "Yes"]], | ||
| [[148, 149], [77, 78], [110, np.nan], [0, 1], [np.nan, "B"], ["Yes", "Yes"]], | ||
| [[150, 151], [79, 80], [56, np.nan], [2, 3], ["B", "B"], ["Yes", "No"]], | ||
| ], | ||
| dtype=object, | ||
| ) | ||
| return ed.EHRData(layers={DEFAULT_TEM_LAYER_NAME: tiny_mixed_array}) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def edata_mini_sample(): | ||
| return ed.io.read_csv(f"{TEST_DATA_PATH}/dataset1.csv", columns_obs_only=["clinic_day"]) | ||
|
|
@@ -415,4 +434,11 @@ def as_dense_dask_array(a, chunk_size=1000): | |
| return da.from_array(a, chunks=chunk_size) | ||
|
|
||
|
|
||
| ARRAY_TYPES = (asarray, as_dense_dask_array) | ||
| ARRAY_TYPES_NUMERIC = ( | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest these collections of array types for testing, which I hope reduces some repetitiveness
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder whether we should have this here or even in our compat code because we might also use this for the implementations?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| asarray, | ||
| as_dense_dask_array, | ||
| sp.csr_array, | ||
| sp.csc_array, | ||
| ) # add coo_array once supported in AnnData | ||
| ARRAY_TYPES_NUMERIC_3D_ABLE = (asarray, as_dense_dask_array) # add coo_array once supported in AnnData | ||
| ARRAY_TYPES_NONNUMERIC = (asarray, as_dense_dask_array) | ||
Uh oh!
There was an error while loading. Please reload this page.