BioJulia · timholy · May 8, 2025 · May 14, 2025
diff --git a/Project.toml b/Project.toml
@@ -12,6 +12,7 @@ Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [weakdeps]
@@ -50,5 +51,6 @@ MetaGraphs = "0.7, 0.8"
 PrecompileTools = "1"
 RecipesBase = "1"
 STRIDE_jll = "1"
+StaticArraysCore = "1"
 Statistics = "1.9"
 julia = "1.9"
diff --git a/src/BioStructures.jl b/src/BioStructures.jl
@@ -18,6 +18,7 @@ using PrecompileTools
 using RecipesBase
 
 using LinearAlgebra
+using StaticArraysCore
 using Statistics
 
 include("model.jl")

diff --git a/src/mmcif.jl b/src/mmcif.jl
@@ -75,16 +75,20 @@
 source.
 The keyword argument `gzip` (default `false`) determines if the input is gzipped.
 """
-struct MMCIFDict <: AbstractDict{String, Vector{String}}
-    dict::Dict{String, Vector{String}}
+struct MMCIFDict{K<:AbstractString} <: AbstractDict{K, Vector{K}}
+    dict::Dict{K, Vector{K}}
 end
 
-MMCIFDict() = MMCIFDict(Dict())
+MMCIFDict{K}() where K<:AbstractString = MMCIFDict{K}(Dict{K,Vector{K}}())
+MMCIFDict() = MMCIFDict{String}()
+
+MMCIFDict(d::AbstractDict{K, Vector{K}}) where K<:AbstractString = MMCIFDict{K}(d)
+MMCIFDict(d::AbstractDict) = MMCIFDict{String}(Dict(d))
 
 Base.getindex(mmcif_dict::MMCIFDict, field::AbstractString) = mmcif_dict.dict[field]
 
 function Base.setindex!(mmcif_dict::MMCIFDict,
-                    val::AbstractVector{<:String},
+                    val::AbstractVector{<:AbstractString},
                     field::AbstractString)
     mmcif_dict.dict[field] = val
     return mmcif_dict
@@ -104,8 +108,7 @@
 
 # Split a mmCIF line into tokens
 # See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for syntax
-function splitline(s::AbstractString)
-    tokens = String[]
+function splitline!(tokens, s::AbstractString)
     in_token = false
     # Quote character of the currently open quote, or ' ' if no quote open
     quote_open_char = ' '
@@ -114,7 +117,7 @@
         if c in whitespacechars
             if in_token && quote_open_char == ' '
                 in_token = false
-                push!(tokens, s[start_i:(i - 1)])
+                push!(tokens, @view(s[start_i:(i - 1)]))
             end
         elseif c in quotechars
             if quote_open_char == ' '
@@ -127,7 +130,7 @@
             elseif c == quote_open_char && (i == length(s) || s[i + 1] in whitespacechars)
                 quote_open_char = ' '
                 in_token = false
-                push!(tokens, s[start_i:(i - 1)])
+                push!(tokens, @view(s[start_i:(i - 1)]))
             end
         elseif c == '#' && !in_token
             return tokens
@@ -137,17 +140,18 @@
         end
     end
     if in_token
-        push!(tokens, s[start_i:end])
+        push!(tokens, @view(s[start_i:end]))
     end
     if quote_open_char != ' '
         throw(ArgumentError("Line ended with quote open: $s"))
     end
     return tokens
 end
+splitline(s::AbstractString) = splitline!(String[], s)   # mostly for testing
 
 # Get tokens from a mmCIF file
 function tokenizecif(f::IO)
-    tokens = String[]
+    tokens = SubString{String}[]
     for line in eachline(f)
         if startswith(line, "#")
             continue
@@ -162,7 +166,7 @@
             end
             push!(tokens, join(token_buffer, "\n"))
         else
-            append!(tokens, splitline(line))
+            splitline!(tokens, line)
         end
     end
     return tokens
@@ -172,7 +176,7 @@
 # This will fail if there is only a single atom record in the file
 #   and it is not in the loop format
 function tokenizecifstructure(f::IO)
-    tokens = String[]
+    tokens = SubString{String}[]
     reading = false
     in_keys = true
     category_groups = ["_atom_site.", "_struct_conf."]
@@ -204,7 +208,7 @@
             in_keys = true
         else
             in_keys = false
-            append!(tokens, splitline(line))
+            splitline!(tokens, line)
         end
     end
     return tokens
@@ -218,14 +222,14 @@
 
 # Read a mmCIF file into a MMCIFDict
 function MMCIFDict(f::IO; gzip::Bool=false)
-    mmcif_dict = MMCIFDict()
     if gzip
         gz = GzipDecompressorStream(f)
         tokens = tokenizecif(gz)
         close(gz)
     else
         tokens = tokenizecif(f)
     end
+    mmcif_dict = MMCIFDict{eltype(tokens)}()
     # Data label token is read first
     if length(tokens) == 0
         return mmcif_dict
@@ -236,16 +240,16 @@
 end
 
 # Add tokens to a mmCIF dictionary
-function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:String})
+function populatedict!(mmcif_dict::MMCIFDict{K}, tokens::AbstractVector{<:AbstractString}) where K<:AbstractString
     key = ""
-    keys = String[]
+    keys = K[]
     loop_flag = false
     i = 0 # Value counter
     n = 0 # Key counter
     for token in tokens
         if token == "loop_" || token == "LOOP_"
             loop_flag = true
-            keys = String[]
+            keys = K[]
             i = 0
             n = 0
             continue
@@ -258,22 +262,14 @@
                 if i > 0
                     loop_flag = false
                 else
-                    mmcif_dict[token] = String[]
+                    mmcif_dict[token] = K[]
                     push!(keys, token)
                     n += 1
                     continue
                 end
             else
-                try
-                    push!(mmcif_dict[keys[i % n + 1]], token)
-                catch ex
-                    # A zero division error means we have not found any keys
-                    if isa(ex, DivideError)
-                        throw(ArgumentError("Loop keys not found, token: \"$token\""))
-                    else
-                        rethrow()
-                    end
-                end
+                iszero(n) && throw(ArgumentError("Loop keys not found, token: \"$token\""))
+                push!(mmcif_dict[keys[i % n + 1]], token)
                 i += 1
                 continue
             end
@@ -298,14 +294,14 @@
             run_dssp::Bool=false,
             run_stride::Bool=false,
             gzip::Bool=false)
-    mmcif_dict = MMCIFDict()
     if gzip
         gz = GzipDecompressorStream(input)
         tokens = tokenizecifstructure(gz)
         close(gz)
     else
         tokens = tokenizecifstructure(input)
     end
+    mmcif_dict = MMCIFDict{eltype(tokens)}()
     populatedict!(mmcif_dict, tokens)
     return MolecularStructure(
         mmcif_dict;
@@ -384,25 +380,34 @@
 end
 
 # Constructor from mmCIF ATOM/HETATM line
-AtomRecord(d::MMCIFDict, i::Integer) = AtomRecord(
-    d["_atom_site.group_PDB"][i] == "HETATM",
-    parse(Int, d["_atom_site.id"][i]),
-    d["_atom_site.auth_atom_id"][i],
-    d["_atom_site.label_alt_id"][i] in missingvals ? ' ' : d["_atom_site.label_alt_id"][i][1],
-    d["_atom_site.auth_comp_id"][i],
-    d["_atom_site.auth_asym_id"][i],
-    parse(Int, d["_atom_site.auth_seq_id"][i]),
-    d["_atom_site.pdbx_PDB_ins_code"][i] in missingvals ? ' ' : d["_atom_site.pdbx_PDB_ins_code"][i][1],
-    [
-        parse(Float64, d["_atom_site.Cartn_x"][i]),
-        parse(Float64, d["_atom_site.Cartn_y"][i]),
-        parse(Float64, d["_atom_site.Cartn_z"][i])
-    ],
-    d["_atom_site.occupancy"][i] in missingvals ? 1.0 : parse(Float64, d["_atom_site.occupancy"][i]),
-    d["_atom_site.B_iso_or_equiv"][i] in missingvals ? 0.0 : parse(Float64, d["_atom_site.B_iso_or_equiv"][i]),
-    d["_atom_site.type_symbol"][i] in missingvals ? "  " : d["_atom_site.type_symbol"][i],
-    d["_atom_site.pdbx_formal_charge"][i] in missingvals ? "  " : d["_atom_site.pdbx_formal_charge"][i],
-)
+function AtomRecord(d::MMCIFDict, i::Integer)
+    alt_id = d["_atom_site.label_alt_id"][i]
+    ins_code = d["_atom_site.pdbx_PDB_ins_code"][i]
+    occupancy = d["_atom_site.occupancy"][i]
+    temp_factor = d["_atom_site.B_iso_or_equiv"][i]
+    typesym = d["_atom_site.type_symbol"][i]
+    charge = d["_atom_site.pdbx_formal_charge"][i]
+
+    return AtomRecord(
+        d["_atom_site.group_PDB"][i] == "HETATM",
+        parse(Int, d["_atom_site.id"][i]),
+        d["_atom_site.auth_atom_id"][i],
+        alt_id in missingvals ? ' ' : alt_id[1],
+        d["_atom_site.auth_comp_id"][i],
+        d["_atom_site.auth_asym_id"][i],
+        parse(Int, d["_atom_site.auth_seq_id"][i]),
+        ins_code in missingvals ? ' ' : ins_code[1],
+        SVector{3,Float64}((
+            parse(Float64, d["_atom_site.Cartn_x"][i]),
+            parse(Float64, d["_atom_site.Cartn_y"][i]),
+            parse(Float64, d["_atom_site.Cartn_z"][i]),
+        )),
+        occupancy in missingvals ? 1.0 : parse(Float64, occupancy),
+        temp_factor in missingvals ? 0.0 : parse(Float64, temp_factor),
+        typesym in missingvals ? "  " : typesym,
+        charge in missingvals ? "  " : charge,
+    )
+end
 
 # Format a mmCIF data value by enclosing with quotes or semicolon lines where
 #   appropriate. See
@@ -672,13 +677,13 @@
 Write multiple `MMCIFDict`s as a `Dict{String, MMCIFDict}` to a filepath or stream.
 The keyword argument `gzip` (default `false`) determines if the output is gzipped.
 """
-function writemultimmcif(filepath::AbstractString, cifs::Dict{String, MMCIFDict}; gzip::Bool=false)
+function writemultimmcif(filepath::AbstractString, cifs::Dict{String, <:MMCIFDict}; gzip::Bool=false)
     open(filepath, "w") do f
         writemultimmcif(f, cifs; gzip=gzip)
     end
 end
 
-function writemultimmcif(io::IO, cifs::Dict{String, MMCIFDict}; gzip::Bool=false)
+function writemultimmcif(io::IO, cifs::Dict{String, <:MMCIFDict}; gzip::Bool=false)
     if gzip
         io = GzipCompressorStream(io)
     end

diff --git a/src/model.jl b/src/model.jl
@@ -102,20 +102,20 @@ end
 Base.showerror(io::IO, e::PDBConsistencyError) = print(io, "PDBConsistencyError: ", e.message)
 
 "An atom that is part of a macromolecule."
-struct Atom <: AbstractAtom
-    serial::Int
-    name::String
-    alt_loc_id::Char
-    coords::Vector{Float64}
-    occupancy::Float64
-    temp_factor::Float64
-    element::String
-    charge::String
-    residue::StructuralElement
+mutable struct Atom <: AbstractAtom
+    const serial::Int
+    const name::String
+    const alt_loc_id::Char
+    coords::SVector{3,Float64}
+    const occupancy::Float64
+    const temp_factor::Float64
+    const element::String
+    const charge::String
+    const residue::StructuralElement
 end
 
 function Atom(a::Atom, r::StructuralElement)
-    return Atom(a.serial, a.name, a.alt_loc_id, copy(a.coords), a.occupancy,
+    return Atom(a.serial, a.name, a.alt_loc_id, a.coords, a.occupancy,
                 a.temp_factor, a.element, a.charge, r)
 end
 
@@ -234,7 +234,7 @@ struct AtomRecord
     chain_id::String
     res_number::Int
     ins_code::Char
-    coords::Vector{Float64}
+    coords::SVector{3,Float64}
     occupancy::Float64
     temp_factor::Float64
     element::String
@@ -483,7 +483,7 @@ Set the x coordinate in Å of an `AbstractAtom` to `val`.
 
 For `DisorderedAtom`s only the default atom is updated.
 """
-x!(at::Atom, x::Real) = (at.coords[1] = x; at)
+x!(at::Atom, x::Real) = (at.coords = SVector{3,Float64}((x, at.coords[2], at.coords[3])))
 x!(dis_at::DisorderedAtom, x::Real) = x!(defaultatom(dis_at), x)
 
 """
@@ -501,7 +501,7 @@ Set the y coordinate in Å of an `AbstractAtom` to `val`.
 
 For `DisorderedAtom`s only the default atom is updated.
 """
-y!(at::Atom, y::Real) = (at.coords[2] = y; at)
+y!(at::Atom, y::Real) = (at.coords = SVector{3,Float64}((at.coords[1], y, at.coords[3])))
 y!(dis_at::DisorderedAtom, y::Real) = y!(defaultatom(dis_at), y)
 
 """
@@ -519,31 +519,30 @@ Set the z coordinate in Å of an `AbstractAtom` to `val`.
 
 For `DisorderedAtom`s only the default atom is updated.
 """
-z!(at::Atom, z::Real) = (at.coords[3] = z; at)
+z!(at::Atom, z::Real) = (at.coords = SVector{3,Float64}((at.coords[1], at.coords[2], z)))
 z!(dis_at::DisorderedAtom, z::Real) = z!(defaultatom(dis_at), z)
 
 """
     coords(at)
 
-Get the coordinates in Å of an `AbstractAtom` as a `Vector{Float64}`.
+Get the coordinates in Å of an `AbstractAtom` as a `SVector{3,Float64}`.
 """
 coords(at::Atom) = at.coords
 coords(dis_at::DisorderedAtom) = coords(defaultatom(dis_at))
 
 """
     coords!(at, new_coords)
 
-Set the coordinates in Å of an `AbstractAtom` to a `Vector` of 3 numbers.
+Set the coordinates in Å of an `AbstractAtom` to `new_coords`, an iterable of 3 numbers.
 
 For `DisorderedAtom`s only the default atom is updated.
 """
 function coords!(at::Atom, new_coords)
     if length(new_coords) != 3
         throw(ArgumentError("3 coordinates must be given"))
     end
-    x!(at, new_coords[1])
-    y!(at, new_coords[2])
-    z!(at, new_coords[3])
+    x, y, z = new_coords
+    at.coords = SVector{3,Float64}((x, y, z))
     return at
 end
 
@@ -784,7 +783,7 @@ function resid(hetatm::Bool, resnum::Int, inscode::Char)
         end
     else
         if inscode == ' '
-            return "$resnum"
+            return string(resnum)
         else
             return "$resnum$inscode"
         end

diff --git a/src/pdb.jl b/src/pdb.jl
@@ -127,11 +127,11 @@ function AtomRecord(pdb_line::String, line_n::Integer=1)
         parsechainid(pdb_line, line_n),
         parseresnumber(pdb_line, line_n),
         parseinscode(pdb_line, line_n),
-        [
+        SVector{3,Float64}((
             parsecoordx(pdb_line, line_n),
             parsecoordy(pdb_line, line_n),
             parsecoordz(pdb_line, line_n)
-        ],
+        )),
         n >= 60 ? parseoccupancy(pdb_line) : 1.0,
         n >= 66 ? parsetempfac(pdb_line) : 0.0,
         n >= 78 ? parseelement(pdb_line) : "  ",