diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..e69de29 diff --git a/python/example.py b/python/example.py new file mode 100644 index 0000000..3d93afd --- /dev/null +++ b/python/example.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +# python3 setup.py install +# python3 python/example.py + +import argparse +import sys +import numpy as np +import pysquigulator + +def main(): + + reference = 'test/nCoV-2019.reference.fasta' + + pysq = pysquigulator.sim(reference, profile='dna-r9-prom', num_threads=8, batch_size=100, output_blow5='test.blow5') + + recs = pysq.simulate_batch() + + for read in recs: + print("read_id:", read['read_id']) + print("read_group:", read['read_group']) + print("digitisation:", read['digitisation']) + print("offset:", read['offset']) + print("range:", read['range']) + print("sampling_rate:", read['sampling_rate']) + print("len_raw_signal:", read['len_raw_signal']) + print("signal:", read['signal'][:10]) + print("signal:", read['signal'][:10]) + print("================================") + + + +if __name__ == '__main__': + main() + diff --git a/python/pysquigulator.pxd b/python/pysquigulator.pxd new file mode 100644 index 0000000..fd32cc2 --- /dev/null +++ b/python/pysquigulator.pxd @@ -0,0 +1,50 @@ +#cython: language_level=3 +#from libc.stdio cimport * +from libc.stdint cimport * +#from libc.stdlib cimport * + +cdef extern from "squigulator.h": + + ctypedef struct squig_t: + pass + + ctypedef struct squig_batch_t: + pass + + ctypedef struct squig_rec_t: + char* read_id; + uint32_t read_group; + double digitisation; + double offset; + double range; + double sampling_rate; + uint64_t len_raw_signal; + int16_t* raw_signal; + + char *channel_number; + double median_before; + int32_t read_number; + uint8_t start_mux; + uint64_t start_time; + uint8_t end_reason; + + pass + + ctypedef struct squig_opt_t: + int32_t avg_rlen; + int64_t random_seed; + int32_t num_threads; + int32_t batch_size; + char *output_blow5; + char *profile; + pass + + + # squigulator interface + void squig_init_opt(squig_opt_t *opts); + squig_t *squig_init(char *ref_fasta, squig_opt_t *opts); + void squig_free(squig_t *sq); + squig_batch_t* squig_sim_batch(squig_t *sq); + void squig_free_batch(squig_batch_t *batch); + squig_rec_t *squig_get_rec(squig_t *sq, squig_batch_t *batch, int i); + diff --git a/python/pysquigulator.pyx b/python/pysquigulator.pyx new file mode 100644 index 0000000..42896c9 --- /dev/null +++ b/python/pysquigulator.pyx @@ -0,0 +1,133 @@ +# distutils: language = c +# cython: language_level=3 +# cython: profile=True + +import sys +# import time +# import logging +import copy +from libc.stdlib cimport malloc, free +from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free +from libc.string cimport strdup +cimport pysquigulator + +# Import the Python-level symbols of numpy +import numpy as np +# Import the C-level symbols of numpy +cimport numpy as np +# Numpy must be initialized. When using numpy from C or Cython you must +# _always_ do that, or you will have segfaults +np.import_array() + + +cdef class sim: + ''' + Creates a new pysquigulator object + + ''' + cdef squig_opt_t opt + cdef squig_t *sq + cdef char *reference + cdef char *profile + cdef char *output_blow5 + cdef squig_batch_t *batch + + cdef squig_rec_t *rec + cdef np.npy_intp shape_seq[1] + + def __cinit__(self, reference, profile='dna-r9-prom', num_threads=8, batch_size=100, output_blow5='test.blow5'): + ''' + C init + ''' + self.sq = NULL + self.batch = NULL + + self.reference = NULL + self.profile = NULL + self.output_blow5 = NULL + + self.reference = strdup(str.encode(reference)) + self.profile = strdup(str.encode(profile)) + self.output_blow5 = strdup(str.encode(output_blow5)) + + squig_init_opt(&self.opt) + + self.opt.profile = self.profile + self.opt.output_blow5 = self.output_blow5 + self.opt.num_threads = num_threads + self.opt.batch_size = batch_size + + self.sq = squig_init(self.reference, &self.opt) + + if self.sq is NULL: + self.logger.error("pysquigulator not initialised") + + + def __init__(self, reference, profile='dna-r9-prom', num_threads=8, batch_size=100, output_blow5='test.blow5'): + ''' + python init + ''' + pass + + def __dealloc__(self): + ''' + free memory + ''' + + if self.sq is not NULL: + squig_free(self.sq) + if self.reference is not NULL: + free(self.reference) + if self.profile is not NULL: + free(self.profile) + if self.output_blow5 is not NULL: + free(self.output_blow5) + + + + def simulate_batch(self): + ''' + process a batch of of signals + ''' + row = {} + + self.batch = squig_sim_batch(self.sq) + if self.batch is NULL: + print("Failed to simulate batch") + + for i in range(self.opt.batch_size): + row = {} + self.rec = squig_get_rec(self.sq, self.batch, i) + # todo check null + + if type(self.rec.read_id) is bytes: + row['read_id'] = self.rec.read_id.decode() + else: + row['read_id'] = self.rec.read_id + row['read_group'] = self.rec.read_group + row['digitisation'] = self.rec.digitisation + row['offset'] = self.rec.offset + row['range'] = self.rec.range + row['sampling_rate'] = self.rec.sampling_rate + row['len_raw_signal'] = self.rec.len_raw_signal + + self.shape_seq[0] = self.rec.len_raw_signal + signal = copy.deepcopy(np.PyArray_SimpleNewFromData(1, self.shape_seq, + np.NPY_INT16, self.rec.raw_signal)) + np.PyArray_UpdateFlags(signal, signal.flags.num | np.NPY_ARRAY_OWNDATA) + row['signal'] = signal + + if type(self.rec.channel_number) is bytes: + row['channel_number'] = self.rec.channel_number.decode() + else: + row['channel_number'] = self.rec.channel_number + + row['median_before'] = self.rec.median_before + row['read_number'] = self.rec.read_number + row['start_mux'] = self.rec.start_mux + row['start_time'] = self.rec.start_time + + yield row + + squig_free_batch(self.batch) + self.batch = NULL diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c0bc87e --- /dev/null +++ b/setup.py @@ -0,0 +1,129 @@ +try: + from setuptools import setup, Extension + from setuptools.command.install import install + +except ImportError: + from distutils.core import setup + from distutils.extension import Extension + +import sys + +include_dirs = [] +# import numpy as np + +# creat dummy closures for metadata reading on first parse of setup.py +# that way it picks up the requirements and installs them, then can use them +# for the install. +try: + import numpy as np + include_dirs = ['include/', 'src/','python/', 'slow5lib/include/', 'slow5lib/thirdparty/streamvbyte/include/', np.get_include()] +except ImportError: + include_dirs = ['include/', 'src/','python/', 'slow5lib/include/', 'slow5lib/thirdparty/streamvbyte/include/'] + def np(*args, ** kwargs ): + import numpy as np + return np(*args, ** kwargs) + +try: + from Cython.Build import build_ext +except ImportError: + def build_ext(*args, ** kwargs ): + from Cython.Build import build_ext + return build_ext(*args, ** kwargs) + +# from Cython.Build import build_ext + +#adapted from https://github.com/lh3/minimap2/blob/master/setup.py + +sources=['python/pysquigulator.pyx', + 'src/format.c', + 'src/genread.c', + 'src/gensig.c', + 'src/methmodel.c', + 'src/misc.c', + 'src/model.c', + 'src/ref.c', + 'src/sim.c', + 'src/sq_api.c', + 'src/thread.c', + 'slow5lib/src/slow5.c', + 'slow5lib/src/slow5_press.c', + 'slow5lib/src/slow5_misc.c', + 'slow5lib/src/slow5_idx.c', + 'slow5lib/thirdparty/streamvbyte/src/streamvbyte_zigzag.c', + 'slow5lib/thirdparty/streamvbyte/src/streamvbyte_decode.c', + 'slow5lib/thirdparty/streamvbyte/src/streamvbyte_encode.c'] +depends=['python/pysquigulator.pxd', + 'python/pysquigulator.h', + 'include/squigulator.h', + 'src/error.h', + 'src/format.h', + 'src/khash.h', + 'src/kseq.h', + 'src/ksort.h', + 'src/misc.h', + 'src/model.h', + 'src/rand.h', + 'src/ref.h', + 'src/seq.h', + 'src/sq.h', + 'src/str.h', + 'src/version.h', + 'slow5lib/include/slow5/slow5.h', + 'slow5lib/include/slow5/slow5_defs.h', + 'slow5lib/include/slow5/slow5_error.h', + 'slow5lib/include/slow5/slow5_press.h', + 'slow5lib/include/slow5/klib/khash.h', + 'slow5lib/include/slow5/klib/kvec.h', + 'slow5lib/src/slow5_extra.h', + 'slow5lib/src/slow5_idx.h', + 'slow5lib/src/slow5_misc.h', + 'slow5lib/src/slow5_byte.h', + 'slow5lib/thirdparty/streamvbyte/include/streamvbyte.h', + 'slow5lib/thirdparty/streamvbyte/include/streamvbyte_zigzag.h'] +extra_compile_args = ['-g', '-Wall', '-O2', '-std=c99'] + + +libraries = ['m', 'z'] +library_dirs = ['.'] + +extensions = [Extension('pysquigulator', + sources = sources, + depends = depends, + extra_compile_args = extra_compile_args, + libraries = libraries, + include_dirs = include_dirs, + library_dirs = library_dirs, + language = 'c' )] + +def readme(): + with open('python/README.md') as f: + return f.read() + + +setup( + name = 'pysquigulator', + version='0.5.0-dev', + url = 'https://github.com/hasindu2008/squigulator', + description='pysquigulator python bindings', + long_description=readme(), + long_description_content_type='text/markdown', + author='Hasindu Gamaarachchi', + author_email='hasindu@garvan.org.au', + maintainer='Hasindu Gamaarachchi', + maintainer_email='hasindu@garvan.org.au', + license = 'MIT', + keywords = ['nanopore', 'signal'], + ext_modules=extensions, + cmdclass= {'build_ext': build_ext}, + classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: C', + 'Programming Language :: Cython', + 'Programming Language :: Python :: 3', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering :: Bio-Informatics'], + python_requires='>=3.4.3', + install_requires=["numpy"], + setup_requires=["Cython", "numpy"] + ) diff --git a/src/error.h b/src/error.h index f2f9422..d298b1c 100644 --- a/src/error.h +++ b/src/error.h @@ -26,7 +26,7 @@ enum sq_log_level_opt { LOG_TRAC // tracing, debugging, verbose, information, warning and error messages }; -enum sq_log_level_opt get_log_level(); +enum sq_log_level_opt get_log_level(void); void set_log_level(enum sq_log_level_opt level); #define DEBUG_PREFIX "[DEBUG] %s: " diff --git a/src/format.h b/src/format.h index cebc686..76c82bd 100644 --- a/src/format.h +++ b/src/format.h @@ -25,7 +25,7 @@ typedef struct { int64_t prefix_end; } aln_t; -aln_t *init_aln(); +aln_t *init_aln(void); void free_aln(aln_t *aln); char *paf_str(aln_t *aln); void sam_hdr_wr(FILE *fp, ref_t *ref);