Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/BAMfunctions.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "BAMfunctions.h"
#include "htslib/htslib/kstring.h"
#include <htslib/kstring.h>


string bam_cigarString (bam1_t *b) {//output CIGAR string
Expand Down
4 changes: 2 additions & 2 deletions source/IncludeDefine.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#define ERROR_OUT string ( __FILE__ ) +":"+ to_string ( (uint) __LINE__ ) +":"+ string ( __FUNCTION__ )

//external libs
#define SAMTOOLS_BGZF_H "htslib/htslib/bgzf.h"
#define SAMTOOLS_SAM_H "htslib/htslib/sam.h"
#define SAMTOOLS_BGZF_H <htslib/bgzf.h>
#define SAMTOOLS_SAM_H <htslib/sam.h>

using namespace std;

Expand Down
40 changes: 26 additions & 14 deletions source/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,23 @@ CXXFLAGSextra ?=

# user may define the compiler
CXX ?= g++
CC ?= gcc

# pre-defined flags
LDFLAGS_shared := -pthread -Lhtslib -Bstatic -lhts -Bdynamic -lz
LDFLAGS_static := -static -static-libgcc -pthread -Lhtslib -lhts -lz
LDFLAGS_shared := -pthread -lhts -Bdynamic -lz
LDFLAGS_static := -static -static-libgcc -pthread -lhts -lz
LDFLAGS_Mac :=-pthread -lz htslib/libhts.a
LDFLAGS_Mac_static :=-pthread -lz -static-libgcc htslib/libhts.a
LDFLAGS_gdb := $(LDFLAGS_shared)

PKGCONFIG ?= pkg-config
PKGLIBS := htslib zlib
PKGCPPFLAGS := $(shell $(PKGCONFIG) --cflags $(PKGLIBS))
PKGLDFLAGS := $(shell $(PKGCONFIG) --libs $(PKGLIBS))

LDFLAGSextra += $(PKGLDFLAGS)
CXXFLAGSextra += $(PKGCPPFLAGS)

DATE_FMT = --iso-8601=seconds
ifdef SOURCE_DATE_EPOCH
BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u "$(DATE_FMT)")
Expand Down Expand Up @@ -45,7 +54,7 @@ CFLAGS ?= -pipe -Wall -Wextra -O3
CXXFLAGS_SIMD ?= -mavx2

# Unconditionally set essential flags and optimization options
CXXFLAGS_common := -std=c++11 -fopenmp $(COMPTIMEPLACE) $(GIT_BRANCH_COMMIT_DIFF)
CXXFLAGS_common := -std=c++11 -fopenmp $(COMPTIMEPLACE) $(CCFLAGS_common_add)
CXXFLAGS_main := -O3 $(CXXFLAGS_common)
CXXFLAGS_gdb := -O0 -g3 $(CXXFLAGS_common)

Expand Down Expand Up @@ -91,19 +100,29 @@ OBJECTS = systemFunctions.o funPrimaryAlignMark.o \
sjdbLoadFromFiles.o sjdbLoadFromStream.o sjdbPrepare.o sjdbBuildIndex.o sjdbInsertJunctions.o mapThreadsSpawn.o \
Parameters_readFilesInit.o Parameters_openReadsFiles.cpp Parameters_closeReadsFiles.cpp Parameters_readSAMheader.o \
bam_cat.o serviceFuns.o GlobalVariables.cpp \
BAMoutput.o BAMfunctions.o ReadAlign_alignBAM.o BAMbinSortByCoordinate.o signalFromBAM.o bamRemoveDuplicates.o BAMbinSortUnmapped.o
BAMoutput.o BAMfunctions.o ReadAlign_alignBAM.o BAMbinSortByCoordinate.o signalFromBAM.o bamRemoveDuplicates.o BAMbinSortUnmapped.o \
$(MMAP_MALLOC_SRC_DIR)/libmmap_allocator.a

SOURCES := $(wildcard *.cpp) $(wildcard *.c)

MMAP_MALLOC_SRC_DIR = mmap_malloc
MMAP_MALLOC_LIB_SRCS = $(wildcard $(MMAP_MALLOC_SRC_DIR)/*.c)
MMAP_MALLOC_OBJ_FILES = $(patsubst $(MMAP_MALLOC_SRC_DIR)/%.c,$(MMAP_MALLOC_SRC_DIR)/%.o,$(MMAP_MALLOC_LIB_SRCS))

$(MMAP_MALLOC_SRC_DIR)/%.o : $(MMAP_MALLOC_SRC_DIR)/%.c
$(CC) -Wall -fPIC -pthread -O3 -Immap_malloc -c $< -o $@

%.o : %.cpp
$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $<
$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CXXFLAGSextra) $(COMPTIMEPLACE) $<

%.o : %.c
$(CXX) -c $(CPPFLAGS) $(CFLAGS) $<
$(CXX) -c $(CPPFLAGS) $(CFLAGS) $(CXXFLAGSextra) $(COMPTIMEPLACE) $<

all: cleanCompileInfo STAR$(SFX)

$(MMAP_MALLOC_SRC_DIR)/libmmap_allocator.a: $(MMAP_MALLOC_OBJ_FILES)
ar r $@ $^

opal/opal.o : opal/opal.cpp opal/opal.h
cd opal && \
$(CXX) -c -I./ -std=c++11 $(CPPFLAGS) $(CXXFLAGS) $(CXXFLAGSextra) $(CXXFLAGS_SIMD) opal.cpp
Expand All @@ -114,8 +133,6 @@ clean:

.PHONY: CLEAN
CLEAN: clean
$(MAKE) -C htslib clean


.PHONY: clean_solo
clean_solo:
Expand All @@ -131,7 +148,7 @@ ifneq ($(MAKECMDGOALS),CLEAN)
ifneq ($(MAKECMDGOALS),clean_solo)
ifneq ($(MAKECMDGOALS),STARforMac)
ifneq ($(MAKECMDGOALS),STARforMacGDB)
Depend.list: $(SOURCES) parametersDefault.xxd htslib
Depend.list: $(SOURCES) parametersDefault.xxd
echo $(SOURCES)
'rm' -f ./Depend.list
$(CXX) $(CXXFLAGS_common) -MM $^ >> Depend.list
Expand All @@ -143,11 +160,6 @@ endif
endif
endif

htslib : htslib/libhts.a

htslib/libhts.a :
$(MAKE) -C htslib lib-static

parametersDefault.xxd: parametersDefault
xxd -i parametersDefault > parametersDefault.xxd

Expand Down
1 change: 0 additions & 1 deletion source/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,6 @@ void Parameters::inputParameters (int argInN, char* argIn[]) {//input parameters

inOut->logMain << "STAR version=" << STAR_VERSION << "\n";
inOut->logMain << "STAR compilation time,server,dir=" << COMPILATION_TIME_PLACE << "\n";
inOut->logMain << "STAR git: " << GIT_BRANCH_COMMIT_DIFF << "\n";
#ifdef COMPILE_FOR_LONG_READS
inOut->logMain << "Compiled for LONG reads" << "\n";
#endif
Expand Down
2 changes: 1 addition & 1 deletion source/STAR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

#include "twoPassRunPass1.h"

#include "htslib/htslib/sam.h"
#include <htslib/sam.h>
#include "parametersDefault.xxd"

void usage(int usageType)
Expand Down
3 changes: 2 additions & 1 deletion source/SoloFeature.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <fstream>
#include <unordered_map>
#include <unordered_set>
#include "mmap_allocator.hpp"

#include "IncludeDefine.h"
#include "ReadAlignChunk.h"
Expand Down Expand Up @@ -53,7 +54,7 @@ class SoloFeature {

vector<double> nUMIperCBmulti;

vector<uint32> countCellGeneUMI;//sparsified matrix for the counts, each entry is: geneID count1 count2 ... countNcounts
vector<uint32, galaxy::mmap_allocator<uint32>> countCellGeneUMI;//sparsified matrix for the counts, each entry is: geneID count1 count2 ... countNcounts
vector<uint32> countCellGeneUMIindex;//index of CBs in the count matrix
uint32 countMatStride; //number of counts per entry in the count matrix

Expand Down
4 changes: 2 additions & 2 deletions source/SoloFeature_emptyDrops_CR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <map>
#include <random>

double logMultinomialPDFsparse(const vector<double> &ambProfileLogP, const vector<uint32> &countCellGeneUMI, const uint32 stride, const uint32 shift, const int64 start, const uint32 nGenes, const vector<double> &logFactorial);
double logMultinomialPDFsparse(const vector<double> &ambProfileLogP, const vector<uint32, galaxy::mmap_allocator<uint32>> &countCellGeneUMI, const uint32 stride, const uint32 shift, const int64 start, const uint32 nGenes, const vector<double> &logFactorial);
void SoloFeature::emptyDrops_CR()
{
if (nCB<=pSolo.cellFilter.eDcr.indMin) {
Expand Down Expand Up @@ -216,7 +216,7 @@ void SoloFeature::emptyDrops_CR()
return;
};

double logMultinomialPDFsparse(const vector<double> &ambProfileLogP, const vector<uint32> &countCellGeneUMI, const uint32 stride, const uint32 shift, const int64 start, const uint32 nGenes, const vector<double> &logFactorial)
double logMultinomialPDFsparse(const vector<double> &ambProfileLogP, const vector<uint32, galaxy::mmap_allocator<uint32>> &countCellGeneUMI, const uint32 stride, const uint32 shift, const int64 start, const uint32 nGenes, const vector<double> &logFactorial)
{
uint32 sumCount=0;
double sumLogFac=0.0, sumCountLogP=0.0;
Expand Down
5 changes: 4 additions & 1 deletion source/SoloFeature_outputResults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ void SoloFeature::outputResults(bool cellFilterYes, string outputPrefixMat)
for (uint32 ii=0; ii<Trans.nGe; ii++) {
geneStr << Trans.geID[ii] <<"\t"<< (Trans.geName[ii].empty() ? Trans.geID[ii] : Trans.geName[ii]);
if (pSolo.outFormat.featuresGeneField3!="-") {
geneStr <<'\t'<< pSolo.outFormat.featuresGeneField3;
if (pSolo.outFormat.featuresGeneField3 == "+")
geneStr <<'\t'<< (Trans.geBiotype[ii].empty() ? "MissingGeneType" : Trans.geBiotype[ii]);
else
geneStr <<'\t'<< pSolo.outFormat.featuresGeneField3;
};
geneStr << '\n';
};
Expand Down
2 changes: 1 addition & 1 deletion source/bamRemoveDuplicates.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <unordered_map>
#include "bamRemoveDuplicates.h"
#include <iostream>
#include "htslib/htslib/sam.h"
#include <htslib/sam.h>
#include "IncludeDefine.h"
#include SAMTOOLS_BGZF_H
#include "ErrorWarning.h"
Expand Down
4 changes: 2 additions & 2 deletions source/bam_cat.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ THE SOFTWARE.
#include <stdlib.h>
#include <unistd.h>

#include "htslib/htslib/bgzf.h"
#include "htslib/htslib/sam.h"
#include <htslib/bgzf.h>
#include <htslib/sam.h>
#include <cstring>

#define BUF_SIZE 0x10000
Expand Down
2 changes: 1 addition & 1 deletion source/bam_cat.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CODE_bam_cat
#define CODE_bam_cat

#include "htslib/htslib/sam.h"
#include <htslib/sam.h>

int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam);

Expand Down
71 changes: 71 additions & 0 deletions source/mmap_allocator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#ifndef _MALLOC_ALLOCATOR_H
#define _MALLOC_ALLOCATOR_H 1

// g++ -std=gnu++20

#include <cstddef>
#include <cstdlib>
#include <new>
#include <type_traits>

#include "mmap_malloc.h"

namespace galaxy {

template <typename _Tp>
class mmap_allocator {
public:
using value_type = _Tp;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 2103. propagate_on_container_move_assignment
using propagate_on_container_move_assignment = std::true_type;

constexpr mmap_allocator() noexcept = default;
constexpr mmap_allocator(const mmap_allocator&) noexcept = default;

template <typename _Tp1>
constexpr mmap_allocator(const mmap_allocator<_Tp1>&) noexcept {};

~mmap_allocator() noexcept = default;

// NB: __n is permitted to be 0. The C++ standard says nothing
// about what the return value is when __n == 0.
[[nodiscard]] _Tp* allocate(size_type __n, const void* = nullptr) {
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 3308. std::allocator<void>().allocate(n)
static_assert(sizeof(_Tp) != 0, "cannot allocate incomplete types");

if (__n > this->_M_max_size()) [[unlikely]] {
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 3190. allocator::allocate sometimes returns too little storage
if (__n > (std::size_t(-1) / sizeof(_Tp))) throw std::bad_array_new_length();
throw std::bad_alloc();
}
_Tp* __ret = static_cast<_Tp*>(mmap_malloc(__n * sizeof(_Tp)));
if (!__ret) throw std::bad_alloc();
return __ret;
}

// __p is not permitted to be a null pointer.
void deallocate(_Tp* __p, size_type) { mmap_free(static_cast<void*>(__p)); }

template <typename _Up>
friend constexpr bool operator==(const mmap_allocator&, const mmap_allocator<_Up>&) noexcept {
return true;
}

private:
constexpr size_type _M_max_size() const noexcept {
#if __PTRDIFF_MAX__ < __SIZE_MAX__
return std::size_t(__PTRDIFF_MAX__) / sizeof(_Tp);
#else
return std::size_t(-1) / sizeof(_Tp);
#endif
}
};

} // namespace galaxy

#endif
20 changes: 20 additions & 0 deletions source/mmap_malloc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef _MMAP_MALLOC_H
#define _MMAP_MALLOC_H 1

#ifdef __cplusplus
extern "C" {
#endif

#include <stdlib.h>

void* mmap_malloc(size_t size);
void mmap_free(void* addr);
void* mmap_calloc(size_t num_elements, size_t element_size);
void* mmap_realloc(void *addr, size_t size);
void* mmap_reallocarray(void *addr, size_t size, size_t count);

#ifdef __cplusplus
}
#endif

#endif /* mmap_malloc.h */
14 changes: 14 additions & 0 deletions source/mmap_malloc/constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef CONSTANTS_H
#define CONSTANTS_H

#include <assert.h>
#include <stdint.h>
#include <stdlib.h>

#define FORCE_INLINE inline __attribute__((always_inline))
#define LOCAL_HELPER static FORCE_INLINE

#define DEBUG_HEAP 0
#define DEBUG_LIST 0

#endif
40 changes: 40 additions & 0 deletions source/mmap_malloc/default_config.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include "default_config.h"

char* const env_mmap_heap_size = "ENV_MMAP_HEAP_SIZE";
char* const env_mmap_alloctor_min_bsize = "ENV_MMAP_ALLOCATOR_MIN_BSIZE";
char* const env_naming_template = "ENV_NAMING_TEMPLATE";

char* const env_profile_file_path = "ENV_PROFILE_FILE_PATH";
char* const env_profile_frequency = "ENV_PROFILE_FREQUENCY";

const size_t default_mmap_heap_size = (size_t) 1024 * 1024 * 1024 * 1024 * 4;
/*
$ grep -m1 "address sizes" /proc/cpuinfo
address sizes : 46 bits physical, 57 bits virtual
*/
const size_t default_mmap_alloctor_min_bsize = (size_t) 1024 * 1024 * 1024 * 2;
char* const default_naming_template = ".mmap_alloc.XXXXXXXXXX";

/*
48 bit is 256T, 44bit:4T, 16-1 possible.

https://www.linuxquestions.org/questions/linux-hardware-18/address-sizes-in-cpuinfo-757456/

Every x86 and x86_64 CPU has an address translation system that translates virtual addresses into physical addresses.

The virtual addresses are the ones programs actually use (they are the numerical values of pointer variables in the program).

The physical addresses are the ones the CPU sends to the memory controller to generate the signals needed to select specific memory locations.

Each process has its own translation tables, so a pointer in one process with identical value to a pointer in another process doesn't normally point to the same physical memory.

The older x86 architecture has a translation design that translates a 32 bit virtual address to a 32 bit physical address.

Newer x86 architecture also supports a translation (called PAE) that translates 32 bit virtual to 36 bit physical.

The x86_64 architecture has 48 bit virtual addresses. Pointers take 64 bits, but the top 17 bits of any 64 bit pointer are required to be identical (all 17 zeros or all 17 ones) so 16 of those 17 bits are redundant.

The x86_64 address translation takes in 48 bit virtual addresses on all models. But the number of physical addresses generated varies by model of CPU chip.

The architecture has an upper limit on the number of physical address bits. I think that is 52, but I'm not sure I remember correctly. But since no one will be using that much physical ram before current CPU models are obsolete, CPU chips don't actually support that much. They support various numbers of physical address bits from 36 up. 40, which you have, is pretty typical.
*/
19 changes: 19 additions & 0 deletions source/mmap_malloc/default_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef DEFAULT_CONFIG_H
#define DEFAULT_CONFIG_H

#include <stdlib.h>

#define MAX_NAMING_TEMPLATE_SIZE 256

extern char* const env_mmap_heap_size;
extern char* const env_mmap_alloctor_min_bsize;
extern char* const env_naming_template;

extern char* const env_profile_file_path;
extern char* const env_profile_frequency;

extern const size_t default_mmap_heap_size;
extern const size_t default_mmap_alloctor_min_bsize;
extern char* const default_naming_template;

#endif
Loading