Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,5 @@ test/ecoli_2kb_region/reads.fasta.index.readdb
/test/*.index
/*.paf
/test/hu
/example
build/libsquigulator.a
20 changes: 16 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@ ifeq ($(zstd),1)
LDFLAGS += -lzstd
endif

STATICLIB = $(BUILD_DIR)/libsquigulator.a

BINARY = squigulator
OBJ = $(BUILD_DIR)/main.o \
$(BUILD_DIR)/model.o \

OBJ_BIN = $(BUILD_DIR)/main.o $(OBJ)
OBJ_LIB = $(BUILD_DIR)/sq_api.o $(OBJ)

OBJ = $(BUILD_DIR)/model.o \
$(BUILD_DIR)/methmodel.o \
$(BUILD_DIR)/misc.o \
$(BUILD_DIR)/sim.o \
Expand All @@ -31,8 +36,12 @@ endif

.PHONY: clean distclean test

$(BINARY): $(OBJ) slow5lib/lib/libslow5.a
$(CC) $(CFLAGS) $(OBJ) slow5lib/lib/libslow5.a $(LDFLAGS) -o $@
$(BINARY): $(OBJ_BIN) slow5lib/lib/libslow5.a
$(CC) $(CFLAGS) $(OBJ_BIN) slow5lib/lib/libslow5.a $(LDFLAGS) -o $@

$(STATICLIB): $(OBJ_LIB) slow5lib/lib/libslow5.a
cp slow5lib/lib/libslow5.a $@
$(AR) rcs $@ $(OBJ_LIB)

HEADERS = src/error.h src/format.h src/misc.h src/model.h \
src/rand.h src/ref.h src/seq.h src/sq.h src/str.h src/version.h \
Expand All @@ -41,6 +50,9 @@ HEADERS = src/error.h src/format.h src/misc.h src/model.h \
$(BUILD_DIR)/main.o: src/main.c $(HEADERS)
$(CC) $(CFLAGS) $(CPPFLAGS) $< -c -o $@

$(BUILD_DIR)/sq_api.o: src/sq_api.c $(HEADERS) include/squigulator.h
$(CC) $(CFLAGS) $(CPPFLAGS) -I include/ $< -c -o $@

$(BUILD_DIR)/model.o: src/model.c src/model.h src/misc.h
$(CC) $(CFLAGS) $(CPPFLAGS) $< -c -o $@

Expand Down
39 changes: 39 additions & 0 deletions examples/example.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// gcc -Wall -I include/ examples/example.c build/libsquigulator.a -o example -lz -lm -lpthread

#include <stdio.h>
#include <squigulator.h>

int main(){

squig_opt_t opt;
squig_init_opt(&opt);
opt.output_blow5 = "test.blow5";
opt.profile = "dna-r9-prom";
opt.num_threads = 8;
opt.batch_size = 1000;

squig_t *sq = squig_init("test/nCoV-2019.reference.fasta", &opt);

fprintf(stdout,"read_id\tread_group\tdigitisation\toffset\trange\tlen_raw_signal\tsum_raw_signal\n");

for(int n=0; n<5; n++){ //1000*5 reads

squig_batch_t *batch = squig_sim_batch(sq);

for(int i=0; i < opt.batch_size; i++){
squig_rec_t *rec = squig_get_rec(sq, batch, i);
fprintf(stdout, "%s\t%d\t%.0f\t%.0f\t%.0f\t%.0f\t%ld\t", rec->read_id, rec->read_group, rec->digitisation, rec->offset, rec->range, rec->sampling_rate, rec->len_raw_signal);
int64_t raw_signal_sum = 0;
for(int j=0; j < rec->len_raw_signal; j++){
raw_signal_sum += rec->raw_signal[j];
}
fprintf(stdout, "%ld\n", raw_signal_sum);
}

squig_free_batch(batch);
}

squig_free(sq);

return 0;
}
48 changes: 48 additions & 0 deletions include/squigulator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef SQUIGULATOR_H
#define SQUIGULATOR_H

#include <stdint.h>

typedef struct {
//from primary SLOW5 fields
char* read_id;
uint32_t read_group;
double digitisation;
double offset;
double range;
double sampling_rate;
uint64_t len_raw_signal;
int16_t* raw_signal;

//from auxilliary SLOW5 fields
char *channel_number;
double median_before;
int32_t read_number;
uint8_t start_mux;
uint64_t start_time;
uint8_t end_reason;

} squig_rec_t;

typedef struct {
int32_t avg_rlen;
int64_t random_seed;
int32_t num_threads;
int32_t batch_size;
char *output_blow5;
char *profile;
//things to add later ...
} squig_opt_t;

typedef struct squig_s squig_t;
typedef struct squig_batch_s squig_batch_t;

void squig_init_opt(squig_opt_t *opts);
squig_t *squig_init(char *ref_fasta, squig_opt_t *opts);
void squig_free(squig_t *sq);
squig_batch_t* squig_sim_batch(squig_t *sq);
void squig_free_batch(squig_batch_t *batch);
squig_rec_t *squig_get_rec(squig_t *sq, squig_batch_t *batch, int i);


#endif
9 changes: 5 additions & 4 deletions src/sim.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ profile_t minion_rna004_rna_prof = {
.dwell_std=0.0
};

static inline profile_t set_profile(char *prof_name, opt_t *opt){

profile_t set_profile(char *prof_name, opt_t *opt){
//opt->rna = 0;
if(strcmp(prof_name, "dna-r9-min") == 0){
return minion_r9_dna_prof;
Expand Down Expand Up @@ -191,14 +192,14 @@ static inline profile_t set_profile(char *prof_name, opt_t *opt){
}
}

static void print_model_stat(profile_t p){
static inline void print_model_stat(profile_t p){
VERBOSE("digitisation: %.1f; sample_rate: %.1f; range: %.1f; offset_mean: %.1f; offset_std: %.1f; dwell_mean: %.1f; dwell_std: %.1f",
p.digitisation,p.sample_rate,p.range,p.offset_mean,p.offset_std,p.dwell_mean,p.dwell_std);
}

///////////////////////////////////////////// CORE and DB /////////////////////////////////////////////

static void init_opt(opt_t *opt){
void init_opt(opt_t *opt){
//opt->ideal = 0;
//opt->ideal_time = 0;
//opt->ideal_amp = 0;
Expand Down Expand Up @@ -261,7 +262,7 @@ static void init_rand(core_t *core){
}
}

static core_t *init_core(opt_t opt, profile_t p, char *refname, char *output_file, char *fasta, char *paf, char *sam, char *trans_count){
core_t *init_core(opt_t opt, profile_t p, char *refname, char *output_file, char *fasta, char *paf, char *sam, char *trans_count){
core_t *core = (core_t *)malloc(sizeof(core_t)); MALLOC_CHK(core);
core->opt = opt;

Expand Down
171 changes: 171 additions & 0 deletions src/sq_api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

#define _XOPEN_SOURCE 700
#include <string.h>
#include <slow5/slow5.h>

#include "squigulator.h"
#include "version.h"
#include "sq.h"
#include "ref.h"
#include "format.h"
#include "seq.h"
#include "rand.h"
#include "error.h"
#include "misc.h"





struct squig_s {
core_t *core;
squig_opt_t opt;
// can later have the header attributes and enum labels here
};

struct squig_batch_s {
db_t *db;
squig_rec_t **rec;
int n_rec;
};

core_t *init_core(opt_t opt, profile_t p, char *refname, char *output_file, char *fasta, char *paf, char *sam, char *trans_count);
void init_opt(opt_t *opt);
profile_t set_profile(char *prof_name, opt_t *opt);
void free_core(core_t *core);
void free_db(db_t* db);
db_t* init_db(core_t* core, int32_t n_rec);
void process_db(core_t* core,db_t* db);
void output_db(core_t* core, db_t* db);

void squig_init_opt(squig_opt_t *opts){
opts->avg_rlen = 1000;
opts->random_seed = 0;
opts->num_threads = 8;
opts->batch_size = 1000;
opts->output_blow5 = "output.blow5";
opts->profile = "dna-r9-prom";
}

squig_t *squig_init(char *ref_fasta, squig_opt_t *opts){
squig_t *sq = (squig_t *)malloc(sizeof(squig_t));
MALLOC_CHK(sq);

opt_t opt;
init_opt(&opt);

opt.rlen = opts->avg_rlen;
opt.num_thread = opts->num_threads;
opt.batch_size = opts->batch_size;
if (opts->random_seed == 0){
opts->random_seed = realtime();
}
opt.seed = opts->random_seed;

profile_t p = set_profile(opts->profile, &opt);

char *output_file = opts->output_blow5;
char *fasta = NULL;
char *paf = NULL;
char *sam = NULL;
char *trans_count = NULL;


sq->core = init_core(opt, p, ref_fasta, output_file, fasta, paf, sam, trans_count);

sq->opt = *opts;

return sq;
}

void squig_free(squig_t *sq){
free_core(sq->core);
free(sq);

}


void squig_free_batch(squig_batch_t *batch){
for(int i = 0; i < batch->n_rec; i++){
free(batch->rec[i]->read_id);
free(batch->rec[i]->raw_signal);
free(batch->rec[i]->channel_number);
free(batch->rec[i]);
}
free(batch->rec);
free_db(batch->db);
free(batch);
}

squig_batch_t *squig_sim_batch(squig_t *sq){

squig_batch_t *batch = (squig_batch_t *)malloc(sizeof(squig_batch_t));
MALLOC_CHK(batch);
batch->n_rec = sq->opt.batch_size;
batch->db = init_db(sq->core, batch->n_rec);
batch->rec = (squig_rec_t **)malloc(batch->n_rec * sizeof(squig_rec_t *));
MALLOC_CHK(batch->rec);
for(int i = 0; i < batch->n_rec; i++){
batch->rec[i] = (squig_rec_t *)malloc(sizeof(squig_rec_t));
MALLOC_CHK(batch->rec[i]);
}
process_db(sq->core, batch->db);
output_db(sq->core, batch->db);
return batch;
}

//this function is hacky, inefficient and repeats unnecessary work. Can be fixed later if performance becomes an issue,
// by integrating into core function. Not wanting to touch those at the moment
squig_rec_t *squig_get_rec(squig_t *sq, squig_batch_t *batch, int i){

if(i < 0 || i >= batch->n_rec){
fprintf(stderr, "Index out of bounds. batch size: %d, requested index: %d\n", batch->n_rec, i);
return NULL;
}

slow5_rec_t *rec = NULL;

size_t bytes = batch->db->mem_bytes[i];
char *mem = (char *)malloc(bytes);
MALLOC_CHK(mem);

if(sq->core->sp->format == SLOW5_FORMAT_ASCII){
memcpy(mem, batch->db->mem_records[i], bytes);
mem[bytes-1] = '\0';
} else if (sq->core->sp->format == SLOW5_FORMAT_BINARY){
memcpy(mem, batch->db->mem_records[i]+8, bytes-8);
} else {
fprintf(stderr, "Something is not right!\n");
exit(EXIT_FAILURE);
}

if (slow5_decode(&mem, &batch->db->mem_bytes[i], &rec, sq->core->sp) < 0){
fprintf(stderr,"Error in decoding record\n");
exit(EXIT_FAILURE);
}

//from primary SLOW5 fields
batch->rec[i]->read_id = strdup(rec->read_id);
batch->rec[i]->read_group = rec->read_group;
batch->rec[i]->digitisation = rec->digitisation;
batch->rec[i]->offset = rec->offset;
batch->rec[i]->range = rec->range;
batch->rec[i]->sampling_rate = rec->sampling_rate;
batch->rec[i]->len_raw_signal = rec->len_raw_signal;
batch->rec[i]->raw_signal = rec->raw_signal; rec->raw_signal = NULL;

//from auxilliary SLOW5 fields
int ret=0;
uint64_t len=0;
batch->rec[i]->channel_number = strdup(slow5_aux_get_string(rec,"channel_number", &len, &ret)); NEG_CHK(ret);
batch->rec[i]->median_before = slow5_aux_get_double(rec,"median_before", &ret); NEG_CHK(ret);
batch->rec[i]->read_number = slow5_aux_get_int32(rec,"read_number", &ret); NEG_CHK(ret);
batch->rec[i]->start_mux = slow5_aux_get_uint8(rec,"start_mux", &ret); NEG_CHK(ret);
batch->rec[i]->start_time = slow5_aux_get_uint64(rec,"start_time", &ret); NEG_CHK(ret);
batch->rec[i]->end_reason = slow5_aux_get_enum(rec,"end_reason", &ret); NEG_CHK(ret);

free(mem);
slow5_rec_free(rec);

return batch->rec[i];
}
Loading