diff --git a/include/bio/var_io/header.hpp b/include/bio/var_io/header.hpp index a563c24..85c8402 100644 --- a/include/bio/var_io/header.hpp +++ b/include/bio/var_io/header.hpp @@ -13,6 +13,9 @@ #pragma once +#include +#include +#include #include #include #include @@ -163,6 +166,7 @@ class header * \{ */ std::string file_format = "VCFv4.3"; //!< The file format version. + std::string file_date = transTime(); //!< The file date. std::vector filters; //!< Header lines describing FILTER fields. std::vector infos; //!< Header lines describing INFO fields. std::vector formats; //!< Header lines describing FORMAT fields. @@ -214,6 +218,19 @@ class header /*!\name Update, reset and inspect * \{ */ + /*! \brief Gets the current time and transforms it in a nice readable way for the vcf header line fileDate. + * + * \returns a time string in the format: YYYY-MM-DD HH:MM:SS. + */ + std::string transTime() + { + std::stringstream text_stream; + const std::chrono::time_point now = std::chrono::system_clock::now(); + const std::time_t rawtime = std::chrono::system_clock::to_time_t(now); + text_stream << std::put_time(std::localtime(&rawtime), "%F %T"); + std::string time_string = text_stream.str(); + return time_string; + } /*!\brief Add missing IDX fields to header entries and ensure that everything has proper hash-entries. * * \details @@ -478,6 +495,9 @@ class header // file format ((raw_data += "##fileformat=") += file_format) += "\n"; + // file date + ((raw_data += "##fileDate=") += file_date) += "\n"; + // filters for (auto const & filter : filters) { @@ -611,6 +631,10 @@ class header { throw format_error{"File has two lines that begin with \"##fileformat\"."}; } + else if (l.starts_with("##fileDate=")) + { + parse_file_date_line(l.substr(11)); + } else if (l.starts_with("##INFO=")) { parse_info_or_format_line(strip_angular_brackets(l.substr(7)), true); @@ -641,6 +665,12 @@ class header } } + //!\brief Parse an INFO or FORMAT line. + void parse_file_date_line(std::string_view const l) + { + file_date = static_cast(l); + } + //!\brief Parse an INFO or FORMAT line. void parse_info_or_format_line(std::string_view const l, bool const is_info) { diff --git a/test/snippet/var_io/var_io_writer.cpp b/test/snippet/var_io/var_io_writer.cpp index d830334..2bb4bd9 100644 --- a/test/snippet/var_io/var_io_writer.cpp +++ b/test/snippet/var_io/var_io_writer.cpp @@ -18,6 +18,7 @@ using namespace seqan3::literals; // a plaintext header std::string_view const text_header = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##contig= ##INFO= ##INFO= @@ -92,6 +93,7 @@ bio::var_io::writer writer{"example2.vcf", // add header so destructor works std::string_view const text_header = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 )"; writer.set_header(bio::var_io::header{text_header}); diff --git a/test/unit/format/bcf_data.hpp b/test/unit/format/bcf_data.hpp index 3806b5c..c668476 100644 --- a/test/unit/format/bcf_data.hpp +++ b/test/unit/format/bcf_data.hpp @@ -184,8 +184,8 @@ inline constexpr std::string_view example_from_spec_bcf_unbgzf_our{ inline constexpr std::string_view example_from_spec_bcf_header = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##FILTER= -##fileDate=20090805 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##contig= diff --git a/test/unit/format/vcf_data.hpp b/test/unit/format/vcf_data.hpp index 36e5ad7..c48fa1c 100644 --- a/test/unit/format/vcf_data.hpp +++ b/test/unit/format/vcf_data.hpp @@ -31,7 +31,7 @@ inline std::string const example_from_spec_records = inline std::string const example_from_spec_header = R"(##fileformat=VCFv4.3 -##fileDate=20090805 +##fileDate=2022-03-02 14:18:22 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##contig= @@ -55,6 +55,7 @@ inline std::string const example_from_spec = example_from_spec_header + example_ inline std::string const example_from_spec_header_regenerated = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##FILTER= ##FILTER= ##FILTER= @@ -69,7 +70,6 @@ inline std::string const example_from_spec_header_regenerated = ##FORMAT= ##FORMAT= ##contig= -##fileDate=20090805 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##phasing=partial @@ -78,6 +78,7 @@ inline std::string const example_from_spec_header_regenerated = inline std::string const example_from_spec_header_regenerated_no_IDX = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##FILTER= ##FILTER= ##FILTER= @@ -92,7 +93,6 @@ inline std::string const example_from_spec_header_regenerated_no_IDX = ##FORMAT= ##FORMAT= ##contig= -##fileDate=20090805 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##phasing=partial @@ -165,10 +165,10 @@ inline std::string const minimal_field_rows = inline std::string const incomplete_header_before = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##FILTER= ##INFO= ##FORMAT= -##fileDate=20090805 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##phasing=partial @@ -177,6 +177,7 @@ inline std::string const incomplete_header_before = inline std::string const incomplete_header_after = R"(##fileformat=VCFv4.3 +##fileDate=2022-03-02 14:18:22 ##FILTER= ##FILTER= ##INFO= @@ -190,7 +191,6 @@ inline std::string const incomplete_header_after = ##FORMAT= ##FORMAT= ##contig= -##fileDate=20090805 ##source=myImputationProgramV3.1 ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta ##phasing=partial diff --git a/test/unit/var_io/var_io_header_test.cpp b/test/unit/var_io/var_io_header_test.cpp index 41f5d39..b84b7ea 100644 --- a/test/unit/var_io/var_io_header_test.cpp +++ b/test/unit/var_io/var_io_header_test.cpp @@ -23,6 +23,7 @@ TEST(var_io_header, spec_from_text) bio::var_io::header hdr{example_from_spec_header}; EXPECT_EQ(hdr.file_format, "VCFv4.3"); + EXPECT_EQ(hdr.file_date, "2022-03-02 14:18:22"); // filters ASSERT_EQ(hdr.filters.size(), 3); @@ -143,8 +144,7 @@ TEST(var_io_header, spec_from_text) EXPECT_TRUE(*++it == (svpair{"taxonomy", "x"})); // other lines in header - std::vector other_lines_cmp{"fileDate=20090805", - "source=myImputationProgramV3.1", + std::vector other_lines_cmp{"source=myImputationProgramV3.1", "reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta", "phasing=partial"}; ASSERT_EQ(hdr.other_lines.size(), other_lines_cmp.size());