diff --git a/Project.toml b/Project.toml index 589f8d6..746158e 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] @@ -50,5 +51,6 @@ MetaGraphs = "0.7, 0.8" PrecompileTools = "1" RecipesBase = "1" STRIDE_jll = "1" +StaticArraysCore = "1" Statistics = "1.9" julia = "1.9" diff --git a/src/BioStructures.jl b/src/BioStructures.jl index 16c20c0..2b36639 100644 --- a/src/BioStructures.jl +++ b/src/BioStructures.jl @@ -18,6 +18,7 @@ using PrecompileTools using RecipesBase using LinearAlgebra +using StaticArraysCore using Statistics include("model.jl") diff --git a/src/mmcif.jl b/src/mmcif.jl index 1ca28c2..970ad8b 100644 --- a/src/mmcif.jl +++ b/src/mmcif.jl @@ -75,16 +75,20 @@ Call `MMCIFDict` with a filepath or stream to read the dictionary from that source. The keyword argument `gzip` (default `false`) determines if the input is gzipped. """ -struct MMCIFDict <: AbstractDict{String, Vector{String}} - dict::Dict{String, Vector{String}} +struct MMCIFDict{K<:AbstractString} <: AbstractDict{K, Vector{K}} + dict::Dict{K, Vector{K}} end -MMCIFDict() = MMCIFDict(Dict()) +MMCIFDict{K}() where K<:AbstractString = MMCIFDict{K}(Dict{K,Vector{K}}()) +MMCIFDict() = MMCIFDict{String}() + +MMCIFDict(d::AbstractDict{K, Vector{K}}) where K<:AbstractString = MMCIFDict{K}(d) +MMCIFDict(d::AbstractDict) = MMCIFDict{String}(Dict(d)) Base.getindex(mmcif_dict::MMCIFDict, field::AbstractString) = mmcif_dict.dict[field] function Base.setindex!(mmcif_dict::MMCIFDict, - val::AbstractVector{<:String}, + val::AbstractVector{<:AbstractString}, field::AbstractString) mmcif_dict.dict[field] = val return mmcif_dict @@ -104,8 +108,7 @@ end # Split a mmCIF line into tokens # See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for syntax -function splitline(s::AbstractString) - tokens = String[] +function splitline!(tokens, s::AbstractString) in_token = false # Quote character of the currently open quote, or ' ' if no quote open quote_open_char = ' ' @@ -114,7 +117,7 @@ function splitline(s::AbstractString) if c in whitespacechars if in_token && quote_open_char == ' ' in_token = false - push!(tokens, s[start_i:(i - 1)]) + push!(tokens, @view(s[start_i:(i - 1)])) end elseif c in quotechars if quote_open_char == ' ' @@ -127,7 +130,7 @@ function splitline(s::AbstractString) elseif c == quote_open_char && (i == length(s) || s[i + 1] in whitespacechars) quote_open_char = ' ' in_token = false - push!(tokens, s[start_i:(i - 1)]) + push!(tokens, @view(s[start_i:(i - 1)])) end elseif c == '#' && !in_token return tokens @@ -137,17 +140,18 @@ function splitline(s::AbstractString) end end if in_token - push!(tokens, s[start_i:end]) + push!(tokens, @view(s[start_i:end])) end if quote_open_char != ' ' throw(ArgumentError("Line ended with quote open: $s")) end return tokens end +splitline(s::AbstractString) = splitline!(String[], s) # mostly for testing # Get tokens from a mmCIF file function tokenizecif(f::IO) - tokens = String[] + tokens = SubString{String}[] for line in eachline(f) if startswith(line, "#") continue @@ -162,7 +166,7 @@ function tokenizecif(f::IO) end push!(tokens, join(token_buffer, "\n")) else - append!(tokens, splitline(line)) + splitline!(tokens, line) end end return tokens @@ -172,7 +176,7 @@ end # This will fail if there is only a single atom record in the file # and it is not in the loop format function tokenizecifstructure(f::IO) - tokens = String[] + tokens = SubString{String}[] reading = false in_keys = true category_groups = ["_atom_site.", "_struct_conf."] @@ -204,7 +208,7 @@ function tokenizecifstructure(f::IO) in_keys = true else in_keys = false - append!(tokens, splitline(line)) + splitline!(tokens, line) end end return tokens @@ -218,7 +222,6 @@ end # Read a mmCIF file into a MMCIFDict function MMCIFDict(f::IO; gzip::Bool=false) - mmcif_dict = MMCIFDict() if gzip gz = GzipDecompressorStream(f) tokens = tokenizecif(gz) @@ -226,6 +229,7 @@ function MMCIFDict(f::IO; gzip::Bool=false) else tokens = tokenizecif(f) end + mmcif_dict = MMCIFDict{eltype(tokens)}() # Data label token is read first if length(tokens) == 0 return mmcif_dict @@ -236,16 +240,16 @@ function MMCIFDict(f::IO; gzip::Bool=false) end # Add tokens to a mmCIF dictionary -function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:String}) +function populatedict!(mmcif_dict::MMCIFDict{K}, tokens::AbstractVector{<:AbstractString}) where K<:AbstractString key = "" - keys = String[] + keys = K[] loop_flag = false i = 0 # Value counter n = 0 # Key counter for token in tokens if token == "loop_" || token == "LOOP_" loop_flag = true - keys = String[] + keys = K[] i = 0 n = 0 continue @@ -258,22 +262,14 @@ function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:String}) if i > 0 loop_flag = false else - mmcif_dict[token] = String[] + mmcif_dict[token] = K[] push!(keys, token) n += 1 continue end else - try - push!(mmcif_dict[keys[i % n + 1]], token) - catch ex - # A zero division error means we have not found any keys - if isa(ex, DivideError) - throw(ArgumentError("Loop keys not found, token: \"$token\"")) - else - rethrow() - end - end + iszero(n) && throw(ArgumentError("Loop keys not found, token: \"$token\"")) + push!(mmcif_dict[keys[i % n + 1]], token) i += 1 continue end @@ -298,7 +294,6 @@ function Base.read(input::IO, run_dssp::Bool=false, run_stride::Bool=false, gzip::Bool=false) - mmcif_dict = MMCIFDict() if gzip gz = GzipDecompressorStream(input) tokens = tokenizecifstructure(gz) @@ -306,6 +301,7 @@ function Base.read(input::IO, else tokens = tokenizecifstructure(input) end + mmcif_dict = MMCIFDict{eltype(tokens)}() populatedict!(mmcif_dict, tokens) return MolecularStructure( mmcif_dict; @@ -384,25 +380,34 @@ function MolecularStructure(mmcif_dict::MMCIFDict; end # Constructor from mmCIF ATOM/HETATM line -AtomRecord(d::MMCIFDict, i::Integer) = AtomRecord( - d["_atom_site.group_PDB"][i] == "HETATM", - parse(Int, d["_atom_site.id"][i]), - d["_atom_site.auth_atom_id"][i], - d["_atom_site.label_alt_id"][i] in missingvals ? ' ' : d["_atom_site.label_alt_id"][i][1], - d["_atom_site.auth_comp_id"][i], - d["_atom_site.auth_asym_id"][i], - parse(Int, d["_atom_site.auth_seq_id"][i]), - d["_atom_site.pdbx_PDB_ins_code"][i] in missingvals ? ' ' : d["_atom_site.pdbx_PDB_ins_code"][i][1], - [ - parse(Float64, d["_atom_site.Cartn_x"][i]), - parse(Float64, d["_atom_site.Cartn_y"][i]), - parse(Float64, d["_atom_site.Cartn_z"][i]) - ], - d["_atom_site.occupancy"][i] in missingvals ? 1.0 : parse(Float64, d["_atom_site.occupancy"][i]), - d["_atom_site.B_iso_or_equiv"][i] in missingvals ? 0.0 : parse(Float64, d["_atom_site.B_iso_or_equiv"][i]), - d["_atom_site.type_symbol"][i] in missingvals ? " " : d["_atom_site.type_symbol"][i], - d["_atom_site.pdbx_formal_charge"][i] in missingvals ? " " : d["_atom_site.pdbx_formal_charge"][i], -) +function AtomRecord(d::MMCIFDict, i::Integer) + alt_id = d["_atom_site.label_alt_id"][i] + ins_code = d["_atom_site.pdbx_PDB_ins_code"][i] + occupancy = d["_atom_site.occupancy"][i] + temp_factor = d["_atom_site.B_iso_or_equiv"][i] + typesym = d["_atom_site.type_symbol"][i] + charge = d["_atom_site.pdbx_formal_charge"][i] + + return AtomRecord( + d["_atom_site.group_PDB"][i] == "HETATM", + parse(Int, d["_atom_site.id"][i]), + d["_atom_site.auth_atom_id"][i], + alt_id in missingvals ? ' ' : alt_id[1], + d["_atom_site.auth_comp_id"][i], + d["_atom_site.auth_asym_id"][i], + parse(Int, d["_atom_site.auth_seq_id"][i]), + ins_code in missingvals ? ' ' : ins_code[1], + SVector{3,Float64}(( + parse(Float64, d["_atom_site.Cartn_x"][i]), + parse(Float64, d["_atom_site.Cartn_y"][i]), + parse(Float64, d["_atom_site.Cartn_z"][i]), + )), + occupancy in missingvals ? 1.0 : parse(Float64, occupancy), + temp_factor in missingvals ? 0.0 : parse(Float64, temp_factor), + typesym in missingvals ? " " : typesym, + charge in missingvals ? " " : charge, + ) +end # Format a mmCIF data value by enclosing with quotes or semicolon lines where # appropriate. See @@ -672,13 +677,13 @@ end Write multiple `MMCIFDict`s as a `Dict{String, MMCIFDict}` to a filepath or stream. The keyword argument `gzip` (default `false`) determines if the output is gzipped. """ -function writemultimmcif(filepath::AbstractString, cifs::Dict{String, MMCIFDict}; gzip::Bool=false) +function writemultimmcif(filepath::AbstractString, cifs::Dict{String, <:MMCIFDict}; gzip::Bool=false) open(filepath, "w") do f writemultimmcif(f, cifs; gzip=gzip) end end -function writemultimmcif(io::IO, cifs::Dict{String, MMCIFDict}; gzip::Bool=false) +function writemultimmcif(io::IO, cifs::Dict{String, <:MMCIFDict}; gzip::Bool=false) if gzip io = GzipCompressorStream(io) end diff --git a/src/model.jl b/src/model.jl index 52545e2..3c57aef 100644 --- a/src/model.jl +++ b/src/model.jl @@ -102,20 +102,20 @@ end Base.showerror(io::IO, e::PDBConsistencyError) = print(io, "PDBConsistencyError: ", e.message) "An atom that is part of a macromolecule." -struct Atom <: AbstractAtom - serial::Int - name::String - alt_loc_id::Char - coords::Vector{Float64} - occupancy::Float64 - temp_factor::Float64 - element::String - charge::String - residue::StructuralElement +mutable struct Atom <: AbstractAtom + const serial::Int + const name::String + const alt_loc_id::Char + coords::SVector{3,Float64} + const occupancy::Float64 + const temp_factor::Float64 + const element::String + const charge::String + const residue::StructuralElement end function Atom(a::Atom, r::StructuralElement) - return Atom(a.serial, a.name, a.alt_loc_id, copy(a.coords), a.occupancy, + return Atom(a.serial, a.name, a.alt_loc_id, a.coords, a.occupancy, a.temp_factor, a.element, a.charge, r) end @@ -234,7 +234,7 @@ struct AtomRecord chain_id::String res_number::Int ins_code::Char - coords::Vector{Float64} + coords::SVector{3,Float64} occupancy::Float64 temp_factor::Float64 element::String @@ -483,7 +483,7 @@ Set the x coordinate in Å of an `AbstractAtom` to `val`. For `DisorderedAtom`s only the default atom is updated. """ -x!(at::Atom, x::Real) = (at.coords[1] = x; at) +x!(at::Atom, x::Real) = (at.coords = SVector{3,Float64}((x, at.coords[2], at.coords[3]))) x!(dis_at::DisorderedAtom, x::Real) = x!(defaultatom(dis_at), x) """ @@ -501,7 +501,7 @@ Set the y coordinate in Å of an `AbstractAtom` to `val`. For `DisorderedAtom`s only the default atom is updated. """ -y!(at::Atom, y::Real) = (at.coords[2] = y; at) +y!(at::Atom, y::Real) = (at.coords = SVector{3,Float64}((at.coords[1], y, at.coords[3]))) y!(dis_at::DisorderedAtom, y::Real) = y!(defaultatom(dis_at), y) """ @@ -519,13 +519,13 @@ Set the z coordinate in Å of an `AbstractAtom` to `val`. For `DisorderedAtom`s only the default atom is updated. """ -z!(at::Atom, z::Real) = (at.coords[3] = z; at) +z!(at::Atom, z::Real) = (at.coords = SVector{3,Float64}((at.coords[1], at.coords[2], z))) z!(dis_at::DisorderedAtom, z::Real) = z!(defaultatom(dis_at), z) """ coords(at) -Get the coordinates in Å of an `AbstractAtom` as a `Vector{Float64}`. +Get the coordinates in Å of an `AbstractAtom` as a `SVector{3,Float64}`. """ coords(at::Atom) = at.coords coords(dis_at::DisorderedAtom) = coords(defaultatom(dis_at)) @@ -533,7 +533,7 @@ coords(dis_at::DisorderedAtom) = coords(defaultatom(dis_at)) """ coords!(at, new_coords) -Set the coordinates in Å of an `AbstractAtom` to a `Vector` of 3 numbers. +Set the coordinates in Å of an `AbstractAtom` to `new_coords`, an iterable of 3 numbers. For `DisorderedAtom`s only the default atom is updated. """ @@ -541,9 +541,8 @@ function coords!(at::Atom, new_coords) if length(new_coords) != 3 throw(ArgumentError("3 coordinates must be given")) end - x!(at, new_coords[1]) - y!(at, new_coords[2]) - z!(at, new_coords[3]) + x, y, z = new_coords + at.coords = SVector{3,Float64}((x, y, z)) return at end @@ -784,7 +783,7 @@ function resid(hetatm::Bool, resnum::Int, inscode::Char) end else if inscode == ' ' - return "$resnum" + return string(resnum) else return "$resnum$inscode" end diff --git a/src/pdb.jl b/src/pdb.jl index 3f7e6b6..eb36891 100644 --- a/src/pdb.jl +++ b/src/pdb.jl @@ -127,11 +127,11 @@ function AtomRecord(pdb_line::String, line_n::Integer=1) parsechainid(pdb_line, line_n), parseresnumber(pdb_line, line_n), parseinscode(pdb_line, line_n), - [ + SVector{3,Float64}(( parsecoordx(pdb_line, line_n), parsecoordy(pdb_line, line_n), parsecoordz(pdb_line, line_n) - ], + )), n >= 60 ? parseoccupancy(pdb_line) : 1.0, n >= 66 ? parsetempfac(pdb_line) : 0.0, n >= 78 ? parseelement(pdb_line) : " ", diff --git a/test/runtests.jl b/test/runtests.jl index 46a794c..93abe51 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -246,7 +246,7 @@ end testparent(getchildren(struc), struc) struc_copy = copy(struc) testparent(getchildren(struc_copy), struc_copy) - struc_copy['A'][10]["CA"].coords[2] = 100 + y!(struc_copy['A'][10]["CA"], 100) @test struc_copy['A'][10]["CA"].coords[2] == 100 @test a.coords[2] == 2 @test struc['A'][10]["CA"].coords[2] == 2 @@ -1783,7 +1783,7 @@ end mmcif_1ake = testfilepath("mmCIF", "1AKE.cif") gzip_file(mmcif_1ake, temp_filename) for dic in (MMCIFDict(mmcif_1ake), MMCIFDict(temp_filename; gzip=true)) - @test isa(dic.dict, Dict{String, Vector{String}}) + @test isa(dic.dict, Dict{K, Vector{K}} where K<:AbstractString) @test dic["_pdbx_database_status.recvd_initial_deposition_date"] == ["1991-11-08"] @test dic["_audit_author.name"] == ["Mueller, C.W.", "Schulz, G.E."] @test length(dic["_atom_site.group_PDB"]) == 3816