Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ The project uses Docker Compose to orchestrate three main services:
- **Setup**: Clone your target repository here (e.g., `git clone https://github.com/bitcoin/bitcoin.git user_supplied_repo`)
- **Docker Mount**: Mounted to backend container at `/app/bitcoin`
- **Used By**: `backend/app/git_processor.py` reads from `config.CONTAINER_SIDE_REPOSITORY_PATH`
- **Git safety**: The backend Docker image now marks `/app/bitcoin` as a safe Git directory automatically, so you only need to run `git config --global --add safe.directory <host-path>` when working with the repo outside of Docker.

2. **`data/neo4j/`** (Auto-created, but required for persistence)
- **Purpose**: Stores Neo4j graph database files
Expand Down Expand Up @@ -453,6 +454,9 @@ You can monitor the import progress by:
- Run queries to check node counts:
```cypher
MATCH (a:Actor) RETURN count(a) as actors
```

```
MATCH (c:Commit) RETURN count(c) as commits
```

Expand Down
1 change: 1 addition & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN pip install --no-cache-dir \
GitPython \
git-fame

RUN git config --global --add safe.directory /app/bitcoin

# Copy app source
COPY app .
Expand Down
25 changes: 25 additions & 0 deletions backend/app/neo4j_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ def merge_import_status(self):

@staticmethod
def _merge_import_status_node(tx):

# debugging the merge function in cypher
# query = "MATCH (a:ImportStatus) RETURN a"

query = """
MERGE (a:ImportStatus)
ON CREATE SET a.git_import_complete = false, a.next_complete = false
Expand Down Expand Up @@ -342,3 +346,24 @@ def _create_and_return_import_status_node(tx):
"git_import_complete": record["a.git_import_complete"],
"next_complete": record["a.next_complete"]
}

def get_node_count(self, label: str) -> int:
"""Get the count of nodes with a specific label."""
# Validate label to prevent injection (alphanumeric and underscores only)
if not label.replace("_", "").isalnum():
raise ValueError(f"Invalid label: {label}. Labels must contain only alphanumeric characters and underscores.")
with self.driver.session() as session:
query = f"MATCH (n:{label}) RETURN count(n) AS count"
result = session.run(query) # type: ignore[arg-type]
record = result.single()
return record["count"] if record else 0

def get_import_status(self):
"""Get the ImportStatus node details."""
with self.driver.session() as session:
query = "MATCH (i:ImportStatus) RETURN i"
result = session.run(query)
record = result.single()
if record:
return dict(record["i"])
return None
36 changes: 36 additions & 0 deletions backend/app/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,42 @@ def resolve_fame(self, info, folder):
contributors=contributors
)

# Count queries for displaying node counts
actor_count = graphene.Int(description="Return the total count of Actor nodes")

def resolve_actor_count(self, info):
db = Neo4jDriver()
count = db.get_node_count("Actor")
db.close()
return count

commit_count = graphene.Int(description="Return the total count of Commit nodes")

def resolve_commit_count(self, info):
db = Neo4jDriver()
count = db.get_node_count("Commit")
db.close()
return count

file_detail_record_count = graphene.Int(description="Return the total count of FileDetailRecord nodes")

def resolve_file_detail_record_count(self, info):
db = Neo4jDriver()
count = db.get_node_count("FileDetailRecord")
db.close()
return count

import_status = graphene.Field(
graphene.JSONString,
description="Return the ImportStatus node details"
)

def resolve_import_status(self, info):
db = Neo4jDriver()
status = db.get_import_status()
db.close()
return status


class CreateGithubOrganization(graphene.Mutation):
class Arguments:
Expand Down
29 changes: 29 additions & 0 deletions backend/tests/test_git_to_neo4j_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,32 @@ def test_folder_processing_with_preseeded_status(sample_git_repo, neo4j_driver):
"src/consensus"
]


def test_count_methods(sample_git_repo, neo4j_driver):
"""Test the new count methods for GraphQL queries."""
# First complete the git import
process_git_data(
repo_path=sample_git_repo.path,
neo4j_driver=neo4j_driver,
folder_paths=[], # No folder paths for initial import
)

# Now process folder data which creates FileDetailRecord nodes
from git_processor import import_bitcoin_path
import_bitcoin_path("src/consensus", repo_path=sample_git_repo.path, neo4j_driver=neo4j_driver)

# Test node counts
actor_count = neo4j_driver.get_node_count("Actor")
commit_count = neo4j_driver.get_node_count("Commit")
file_detail_count = neo4j_driver.get_node_count("FileDetailRecord")

assert actor_count >= 3
assert commit_count == len(sample_git_repo.commits)
assert file_detail_count == 1 # We imported one folder

# Test import status
status = neo4j_driver.get_import_status()
assert status is not None
assert "git_import_complete" in status
assert status["git_import_complete"] is True

Loading