Merge pull request #308 from JuliaGraphs/nonbacktracking_implicit

jpfairbanks · jpfairbanks · commit 2fbea52cfd58 · 2016-03-06T23:08:32.000-05:00
add Nonbacktracking type
diff --git a/bench/nonbacktracking.jl b/bench/nonbacktracking.jl
@@ -0,0 +1,48 @@
+using LightGraphs 
+
+sizesnbm1 = Int64[@allocated non_backtracking_matrix(CycleGraph(2^i))for i in 4:10]
+sizesnbm2 = Int64[@allocated Nonbacktracking(CycleGraph(2^i)) for i in 4:10]
+
+
+println("Relative Sizes:\n $(float(sizesnbm1./sizesnbm2))")
+
+macro storetime(name, expression)
+    ex = quote
+        val = $expression;
+        timeinfo[$name] = @elapsed $expression; 
+        val
+    end
+    return ex
+end
+
+function bench(g)
+    nbt = @storetime :Construction_Nonbacktracking  Nonbacktracking(g)
+    x  = ones(Float64, size(nbt)[1])
+    info("Cycle with $n vertices has nbt in R^$(size(nbt))")
+
+    B, nmap = @storetime :Construction_Dense        non_backtracking_matrix(g)
+    y  = @storetime :Multiplication_Nonbacktracking nbt*x
+    z  = @storetime :Multiplication_Dense           B*x;
+    S  = @storetime :Construction_DS                sparse(B)
+    z  = @storetime :Multiplication_Sparse          z = S*x;
+    Sp = @storetime :Construction_Sparse            sparse(nbt)
+    z  = @storetime :Multiplication_Sparse          Sp*x
+end
+
+function report(timeinfo)
+    info("Times")
+    println("Function\t Constructors\t Multiplication")
+    println("Dense   \t $(timeinfo[:Construction_Dense])\t $(timeinfo[:Multiplication_Dense])")
+    println("Sparse  \t $(timeinfo[:Construction_Sparse])\t $(timeinfo[:Multiplication_Sparse])")
+    println("Implicit\t $(timeinfo[:Construction_Nonbacktracking])\t $(timeinfo[:Multiplication_Nonbacktracking])")
+    info("Implicit Multiplication is $(timeinfo[:Multiplication_Dense]/timeinfo[:Multiplication_Nonbacktracking]) faster than dense.")
+    info("Sparse Multiplication is $(timeinfo[:Multiplication_Nonbacktracking]/timeinfo[:Multiplication_Sparse]) faster than implicit.")
+    info("Direct Sparse Construction took $(timeinfo[:Construction_Sparse])
+    Dense to Sparse took: $(timeinfo[:Construction_DS])")
+end
+
+n = 2^13
+C = CycleGraph(n)
+timeinfo = Dict{Symbol, Float64}()
+bench(C)
+report(timeinfo)
diff --git a/src/LightGraphs.jl b/src/LightGraphs.jl
@@ -82,6 +82,8 @@ indegree_centrality, outdegree_centrality, katz_centrality, pagerank,
 # spectral
 adjacency_matrix,laplacian_matrix, adjacency_spectrum, laplacian_spectrum,
 CombinatorialAdjacency, non_backtracking_matrix, incidence_matrix,
+nonbacktrack_embedding, Nonbacktracking,
+contract,
 
 # astar
 a_star,
diff --git a/src/community/detection.jl b/src/community/detection.jl
@@ -9,13 +9,13 @@ return : array containing vertex assignments
 """
 function community_detection_nback(g::Graph, k::Int)
     #TODO insert check on connected_components
-    ϕ = nonbacktrack_embedding(g, k)
+    ϕ = real(nonbacktrack_embedding(g, k))
     if k==2
         c = community_detection_threshold(g, ϕ[1,:])
     else
         c = kmeans(ϕ, k).assignments
     end
-    c
+    return c
 end
 
 function community_detection_threshold(g::SimpleGraph, coords::AbstractArray)
@@ -30,31 +30,29 @@ function community_detection_threshold(g::SimpleGraph, coords::AbstractArray)
 end
 
 
-
 """ Spectral embedding of the non-backtracking matrix of `g`
 (see [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)).
 
 `g`: imput Graph
 `k`: number of dimensions in which to embed
 
 return : a matrix ϕ where ϕ[:,i] are the coordinates for vertex i.
+
+Note does not explicitly construct the `non_backtracking_matrix`.
+See `Nonbacktracking` for details.
+
 """
 function nonbacktrack_embedding(g::Graph, k::Int)
-    B, edgeid = non_backtracking_matrix(g)
-    λ,eigv,_ = eigs(B, nev=k+1)
-    ϕ = zeros(Float64, k-1, nv(g))
+    B = Nonbacktracking(g)
+    λ,eigv,conv = eigs(B, nev=k+1, v0=ones(Float64, B.m))
+    ϕ = zeros(Complex64, nv(g), k-1)
     # TODO decide what to do with the stationary distribution ϕ[:,1]
     # this code just throws it away in favor of eigv[:,2:k+1].
     # we might also use the degree distribution to scale these vectors as is
     # common with the laplacian/adjacency methods.
     for n=1:k-1
         v= eigv[:,n+1]
-        for i=1:nv(g)
-            for j in neighbors(g, i)
-                u = edgeid[Edge(j,i)]
-                ϕ[n,i] += v[u]
-            end
-        end
+        ϕ[:,n] = contract(B, v)
     end
-    return ϕ
+    return ϕ'
 end
diff --git a/src/spectral.jl b/src/spectral.jl
@@ -49,45 +49,6 @@ function adjacency_matrix(g::SimpleGraph, dir::Symbol=:out, T::DataType=Int)
 end
 
 
-"""
-Given two oriented edges i->j and k->l in g, the
-non-backtraking matrix B is defined as
-
-B[i->j, k->l] = δ(j,k)* (1 - δ(i,l))
-
-returns a matrix B, and an edgemap storing the oriented edges' positions in B
-"""
-function non_backtracking_matrix(g::SimpleGraph)
-    # idedgemap = Dict{Int, Edge}()
-    edgeidmap = Dict{Edge, Int}()
-    m = 0
-    for e in edges(g)
-        m += 1
-        edgeidmap[e] = m
-    end
-
-    if !is_directed(g)
-        for e in edges(g)
-            m += 1
-            edgeidmap[reverse(e)] = m
-        end
-    end
-
-    B = zeros(Float64, m, m)
-
-    for (e,u) in edgeidmap
-        i, j = src(e), dst(e)
-        for k in in_neighbors(g,i)
-            k == j && continue
-            v = edgeidmap[Edge(k, i)]
-            B[v, u] = 1
-        end
-    end
-
-    return B, edgeidmap
-end
-
-
 """Returns a sparse [Laplacian matrix](https://en.wikipedia.org/wiki/Laplacian_matrix)
 for a graph `g`, indexed by `[src, dst]` vertices. For undirected graphs, `dir`
 defaults to `:out`; for directed graphs, `dir` defaults to `:both`. `T`
@@ -126,6 +87,7 @@ adjacency_spectrum(g::DiGraph, dir::Symbol=:both, T::DataType=Int) = eigvals(ful
 
 
 # GraphMatrices integration
+# CombinatorialAdjacency(g) returns a type that supports iterative linear solvers and eigenvector solvers.
 @require GraphMatrices begin
 
 function CombinatorialAdjacency(g::Graph)
@@ -167,3 +129,151 @@ function incidence_matrix(g::SimpleGraph, T::DataType=Int)
     spmx = SparseMatrixCSC(n_v,n_e,colpt,rowval,nzval)
     return spmx
 end
+
+"""
+Given two oriented edges i->j and k->l in g, the
+non-backtraking matrix B is defined as
+
+B[i->j, k->l] = δ(j,k)* (1 - δ(i,l))
+
+returns a matrix B, and an edgemap storing the oriented edges' positions in B
+"""
+function non_backtracking_matrix(g::SimpleGraph)
+    # idedgemap = Dict{Int, Edge}()
+    edgeidmap = Dict{Edge, Int}()
+    m = 0
+    for e in edges(g)
+        m += 1
+        edgeidmap[e] = m
+    end
+
+    if !is_directed(g)
+        for e in edges(g)
+            m += 1
+            edgeidmap[reverse(e)] = m
+        end
+    end
+
+    B = zeros(Float64, m, m)
+
+    for (e,u) in edgeidmap
+        i, j = src(e), dst(e)
+        for k in in_neighbors(g,i)
+            k == j && continue
+            v = edgeidmap[Edge(k, i)]
+            B[v, u] = 1
+        end
+    end
+
+    return B, edgeidmap
+end
+
+"""Nonbacktracking: a compact representation of the nonbacktracking operator
+
+    g: the underlying graph
+    edgeidmap: the association between oriented edges and index into the NBT matrix
+
+The Nonbacktracking operator can be used for community detection.
+This representation is compact in that it uses only ne(g) additional storage
+and provides an implicit representation of the matrix B_g defined below.
+
+Given two oriented edges i->j and k->l in g, the
+non-backtraking matrix B is defined as
+
+B[i->j, k->l] = δ(j,k)* (1 - δ(i,l))
+
+This type is in the style of GraphMatrices.jl and supports the necessary operations
+for computed eigenvectors and conducting linear solves.
+
+Additionally the contract!(vertexspace, nbt, edgespace) method takes vectors represented in
+the domain of B and represents them in the domain of the adjacency matrix of g.
+"""
+type Nonbacktracking{G}
+    g::G
+    edgeidmap::Dict{Edge,Int}
+    m::Int
+end
+
+function Nonbacktracking(g::SimpleGraph)
+    edgeidmap = Dict{Edge, Int}()
+    m = 0
+    for e in edges(g)
+        m += 1
+        edgeidmap[e] = m
+    end
+    if !is_directed(g)
+        for e in edges(g)
+            m += 1
+            edgeidmap[reverse(e)] = m
+        end
+    end
+    return Nonbacktracking(g, edgeidmap, m)
+end
+
+size(nbt::Nonbacktracking) = (nbt.m,nbt.m)
+eltype(nbt::Nonbacktracking) = Float64
+issym(nbt::Nonbacktracking) = false
+
+function *{G, T<:Number}(nbt::Nonbacktracking{G}, x::Vector{T})
+    length(x) == nbt.m || error("dimension mismatch")
+    y = zeros(T, length(x))
+    for (e,u) in nbt.edgeidmap
+        i, j = src(e), dst(e)
+        for k in in_neighbors(nbt.g,i)
+            k == j && continue
+            v = nbt.edgeidmap[Edge(k, i)]
+            y[v] += x[u]
+        end
+    end
+    return y
+end
+
+function coo_sparse(nbt::Nonbacktracking)
+    m = nbt.m
+    #= I,J = zeros(Int, m), zeros(Int, m) =#
+    I,J = zeros(Int, 0), zeros(Int, 0)
+    for (e,u) in nbt.edgeidmap
+        i, j = src(e), dst(e)
+        for k in in_neighbors(nbt.g,i)
+            k == j && continue
+            v = nbt.edgeidmap[Edge(k, i)]
+            #= J[u] = v =#
+            #= I[u] = u =#
+            push!(I, v)
+            push!(J, u)
+        end
+    end
+    return I,J,1.0
+end
+
+sparse(nbt::Nonbacktracking) = sparse(coo_sparse(nbt)..., nbt.m,nbt.m)
+
+function *{G, T<:Number}(nbt::Nonbacktracking{G}, x::AbstractMatrix{T})
+    y = zeros(x)
+    for i in 1:nbt.m
+        y[:,i] = nbt * x[:,i]
+    end
+    return y
+end
+
+"""contract!(vertexspace, nbt, edgespace) in place version of
+contract(nbt, edgespace). modifies first argument
+"""
+function contract!(vertexspace::Vector, nbt::Nonbacktracking, edgespace::Vector)
+    for i=1:nv(nbt.g)
+        for j in neighbors(nbt.g, i)
+            u = nbt.edgeidmap[Edge(j,i)]
+            vertexspace[i] += edgespace[u]
+        end
+    end
+end
+
+"""contract(nbt, edgespace)
+Integrates out the edges by summing over the edges incident to each vertex.
+"""
+function contract(nbt::Nonbacktracking, edgespace::Vector)
+    y = zeros(eltype(edgespace), nv(nbt.g))
+    contract!(y,nbt,edgespace)
+    return y
+end
+
diff --git a/test/community/detection.jl b/test/community/detection.jl
@@ -1,3 +1,55 @@
+""" Spectral embedding of the non-backtracking matrix of `g`
+(see [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)).
+
+`g`: imput Graph
+`k`: number of dimensions in which to embed
+
+return : a matrix ϕ where ϕ[:,i] are the coordinates for vertex i.
+"""
+function nonbacktrack_embedding_dense(g::Graph, k::Int)
+    B, edgeid = non_backtracking_matrix(g)
+    λ,eigv,conv = eigs(B, nev=k+1, v0=ones(Float64, size(B,1)))
+    ϕ = zeros(Complex64, k-1, nv(g))
+    # TODO decide what to do with the stationary distribution ϕ[:,1]
+    # this code just throws it away in favor of eigv[:,2:k+1].
+    # we might also use the degree distribution to scale these vectors as is
+    # common with the laplacian/adjacency methods.
+    for n=1:k-1
+        v= eigv[:,n+1]
+        for i=1:nv(g)
+            for j in neighbors(g, i)
+                u = edgeid[Edge(j,i)]
+                ϕ[n,i] += v[u]
+            end
+        end
+    end
+    return ϕ
+end
+
+n = 10; k = 5
+pg = PathGraph(n)
+ϕ1 = nonbacktrack_embedding(pg, k)'
+
+nbt = Nonbacktracking(pg)
+B, emap = non_backtracking_matrix(pg)
+Bs = sparse(nbt)
+@test sparse(B) == Bs
+
+# check that matvec works
+x = ones(Float64, nbt.m)
+y = nbt * x
+z = B * x
+@test norm(y-z) < 1e-8
+
+#check that matmat works and full(nbt) == B
+@test norm(nbt*eye(nbt.m) - B) < 1e-8
+
+#check that we can use the implicit matvec in nonbacktrack_embedding
+@test size(y) == size(x)
+ϕ2 = nonbacktrack_embedding_dense(pg, k)'
+@test size(ϕ2) == size(ϕ1)
+
+#check that this recovers communities in the path of cliques
 n=10
 g10 = CompleteGraph(n)
 z = copy(g10)
@@ -13,3 +65,4 @@ for k=2:5
         end
     end
 end
+