diff --git a/.gitignore b/.gitignore
index eeee3ce5..38d2af96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,105 @@ src/graph_notebook/widgets/package-lock.json
blazegraph.jnl
rules.log
*.env
+notebook/destination/dir/About-the-Neptune-Notebook.ipynb
+notebook/destination/dir/Overview.ipynb
+notebook/destination/dir/Untitled.ipynb
+notebook/destination/dir/Untitled1.ipynb
+notebook/destination/dir/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+notebook/destination/dir/.ipynb_checkpoints/Untitled1-checkpoint.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/01-About-the-Neptune-Notebook.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/02-Using-Gremlin-to-Access-the-Graph.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/03-Using-RDF-and-SPARQL-to-Access-the-Graph.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/04-Social-Network-Recommendations-with-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/05-Dining-By-Friends-in-Amazon-Neptune.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-SPARQL.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Blog Workbench Visualization.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-SPARQL.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Grouping-and-Appearance-Customization-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/00-Sample-Applications-Overview.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/README.md
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/01-Fraud-Graphs/01-Building-a-Fraud-Graph-Application.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/01-Building-an-Identity-Graph-Application.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/02-Data-Modeling-for-Identity-Graphs.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-demographics.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-telemetry.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-transactions.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/05-Healthcare-and-Life-Sciences-Graphs/01-Modeling-Molecular-Structures-as-Graph-Data-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/01-Identifying-Fraud-Rings-Using-Social-Network-Analytics.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/02-Identifying-1st-Person-Synthetic-Identity-Fraud-Using-Graph-Similarity.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/03-Logistics-Analysis-using-a-Transportation-Network.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/07-Games-Industry-Graphs/01-Building-a-Social-Network-for-Games-Gremlin.ipynb
+notebook/destination/dir/02-Neptune-Analytics/01-Getting-Started/01-Getting-Started-With-Neptune-Analytics.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/00-Amazon-Neptune-Analytics-Algorithm-Support.pdf
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/01-Getting-Started-With-Graph-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/02-Path-Finding-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/03-Centrality-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/04-Community-Detection-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/05-Similarity-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/06-Vector-Similarity-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/.ipynb_checkpoints/03-Centrality-Algorithms-checkpoint.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/Overview.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/01-FinTech/01-Fraud-Ring-Identifcation.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/02-Investment-Analysis/01-EDGAR-Competitor-Analysis-using-Knowledge-Graph-Graph-Algorithms-and-Vector-Search.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/00-Intro-to-Software-Bill-Of-Materials.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/01-SBOM-Dependency-Analysis.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/02-SBOM-Vulnerability-Analysis.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/nodestream_template.yaml
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/aws-sdk-pandas_aws_de5d1610d6d4ea3be44a01ab3f09b64e291a4ab7.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/boto3_boto_6bbdf83ee00b749587f0fe54778fbec5411147b5.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-explorer_aws_39eed2c8bae4afc1b38fa7975c720461a7c7c3a6.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-notebook_aws_bb96dd8d0d9ef9d0e9060f8c5e26a042a3db40c4.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/aws-cli-2-0-6.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-console-3-7-1_cydx.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-server-3-7-1-cydx.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/sbom_code/sbom_helper.py
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/Air-Routes-Ontology-Diagram.png
+notebook/destination/dir/03-Neptune-ML/neptune_ml_utils.py
+notebook/destination/dir/03-Neptune-ML/neptune-ml-pretrained-model-config.json
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/01-Getting-Started-with-Neptune-ML-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/02-Introduction-to-Node-Classification-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/03-Introduction-to-Node-Regression-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/04-Introduction-to-Link-Prediction-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/05-Introduction-to-Edge-Classification-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/06-Introduction-to-Edge-Regression-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune_ml_sparql_utils.py
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-00-Getting-Started-with-Neptune-ML-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-01-Introduction-to-Object-Classification-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-02-Introduction-to-Object-Regression-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-03-Introduction-to-Link-Prediction-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune-ml-pretrained-rdf-model-config.json
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/02-Job-Recommendation-Text-Encoding.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/WA_Fn-UseC_-HR-Employee-Attrition.csv
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1a-Use-case.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1b-Graph_init.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2a-GraphQueryGremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2b-GraphQueryLLM.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3a-TransductiveMode-CellPrediction.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3b-InductiveModeCell-Prediction.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/neptune_ml_utils.py
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/Transform2Neptune.py
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/01-Basic-Read-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/02-Loops-Repeats.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/03-Ordering-Functions-Grouping.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/04-Creating-Updating-Deleting-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/Gremlin-Exercises-Answer-Sheet.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/01-Basic-Read-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/02-Variable-Length-Paths.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/03-Ordering-Functions-Grouping.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/04-Creating-Updating-Delete-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/openCypher-Exercises-Answer-Key.ipynb
+notebook/destination/dir/04-Language-Tutorials/03-SPARQL/01-SPARQL-Basics.ipynb
diff --git a/ChangeLog.md b/ChangeLog.md
index 49e3435a..b5da410e 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -3,6 +3,7 @@
Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.
## Upcoming
+- Added %degreeDistribution magic command ([PR](https://github.com/aws/graph-notebook/pull/749)) TODO: add to the specific release below when it's released
## Release 5.0.1 (May 19, 2025)
- Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735))
diff --git a/pyproject.toml b/pyproject.toml
index 2cab94eb..f678a191 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
'networkx==2.4',
'numpy>=1.23.5,<1.24.0',
'pandas>=2.1.0,<=2.2.2',
+ 'plotly>=6.1.2',
# Graph databases and query languages
'gremlinpython>=3.5.1,<=3.7.2',
diff --git a/requirements.txt b/requirements.txt
index a80fd44e..b29e8f7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,6 +18,7 @@ itables>=2.0.0,<=2.1.0
networkx==2.4
numpy>=1.23.5,<1.24.0
pandas>=2.1.0,<=2.2.2
+plotly>=6.1.2
# Graph databases and query languages
gremlinpython>=3.5.1,<=3.7.2
diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index acf1a0e3..b1e4e190 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -15,6 +15,14 @@
import uuid
import ast
import re
+import sys
+import math
+
+import numpy as np
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from IPython.display import display
+
from ipyfilechooser import FileChooser
from enum import Enum
@@ -53,7 +61,8 @@
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
- normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
+ normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \
+ normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \
GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \
GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name)
from graph_notebook.network import SPARQLNetwork
@@ -3541,9 +3550,10 @@ def neptune_ml(self, line, cell='', local_ns: dict = None):
with main_output:
print(message)
- def handle_opencypher_query(self, line, cell, local_ns):
+ def handle_opencypher_query(self, line, cell, local_ns, return_tabs=False):
"""
- This method in its own handler so that the magics %%opencypher and %%oc can both call it
+ This method in its own handler so that the magics %%opencypher and %%oc can both call it.
+ return_tabs: If True, return the titles and children lists instead of displaying the tab (which are later displayed by the caller function).
"""
parser = argparse.ArgumentParser()
parser.add_argument('-pc', '--plan-cache', type=str.lower, default='auto',
@@ -3766,7 +3776,8 @@ def handle_opencypher_query(self, line, cell, local_ns):
for i in range(len(titles)):
tab.set_title(i, titles[i])
- display(tab)
+ if not return_tabs:
+ display(tab)
with metadata_output:
display(HTML(oc_metadata.to_html()))
@@ -3844,6 +3855,9 @@ def handle_opencypher_query(self, line, cell, local_ns):
store_to_ns(args.store_to, stored_results, local_ns)
+ if return_tabs:
+ return {'titles': titles, 'children': children}
+
def handle_opencypher_status(self, line, local_ns):
"""
This is refactored into its own handler method so that we can invoke it from
@@ -3920,3 +3934,766 @@ def handle_opencypher_status(self, line, local_ns):
store_to_ns(args.store_to, js, local_ns)
if not args.silent:
print(json.dumps(js, indent=2))
+
+ # %degreeDistribution magic command.
+ # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply
+ # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis.
+ # It takes traversalDirection [both (default), inbound, outbound], vertexLabels [default is empty list],
+ # edgeLabels parameters [default is empty list], and then gives the histogram for the specified degree
+ # (both/in/out) distribution of the vertices in the graph filtered by the specified vertex labels and edge
+ # labels. Parameters can be defined as command line argument and/or through the dropdown widgets.
+ # Example usages:
+ # > %degreeDistribution
+ # > %degreeDistribution --traversalDirection inbound
+ # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
+
+ @line_magic
+ @needs_local_scope
+ @display_exceptions
+ @neptune_graph_only
+ def degreeDistribution(self, line, local_ns: dict = None):
+ if not self.client.is_analytics_domain():
+ print("This command is only supported for Neptune Analytics domains.")
+ return
+
+ parser = argparse.ArgumentParser()
+
+ # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
+ try:
+ summary_res = self.client.statistics("propertygraph", True, "detailed", True)
+ summary_res.raise_for_status()
+ summary_res_json = summary_res.json()
+ available_vertex_labels = summary_res_json['graphSummary']['nodeLabels']
+ available_edge_labels = summary_res_json['graphSummary']['edgeLabels']
+ except Exception as e:
+ print(f"Error retrieving graph summary: {e}")
+ return
+
+ # traversalDirection: Type of the degree computed:
+ # - inbound: Counts only the incoming edges for each vertex
+ # - outbound: Counts only the outgoing edges for each vertex
+ # - both [default]: Counts both the incoming and outgoing edges for each vertex.
+ parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default='both',
+ help=f'Type of the degree for which the distribution is shown. Valid inputs: {TRAVERSAL_DIRECTIONS}. '
+ f'Default: both.',
+ choices=TRAVERSAL_DIRECTIONS)
+
+ # vertexLabels: List of the vertex labels, space separated, for which the degrees are computed:
+ # - default value is empty list, which means the degrees are computed for any vertex label.
+ parser.add_argument('--vertexLabels', nargs='*', default=[],
+ help="The vertex labels for which the induced graph is considered and the degree distribution is shown. "
+ "If not supplied, we will default to using all the vertex labels.")
+
+ # edgeLabels: List of the edge labels, space separated, for which the degrees are computed:
+ # - default value is empty list, which means the degrees are computed for any edge label.
+ parser.add_argument('--edgeLabels', nargs='*', default=[],
+ help="The edge labels for which the degree distribution is shown. If not supplied, "
+ "we will default to using all the edge labels.")
+
+
+ args = parser.parse_args(line.split())
+
+ # If the traversalDirection parameter selection is specified on the command line, it is shown as the default
+ # in the dropdown menu. Othweise, the default in the dropdown is 'both'
+ td_val = args.traversalDirection
+ td_val = td_val.lower() if td_val else 'both'
+
+ td_dropdown = widgets.Dropdown(
+ options=TRAVERSAL_DIRECTIONS,
+ description='Degree type:',
+ disabled=False,
+ style=SEED_WIDGET_STYLE,
+ value = td_val
+ )
+
+ # Existing vertex labels in the graph are shown in the dropdown menu. If any vertex label is specified on
+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
+ available_vertex_labels = sorted(available_vertex_labels)
+ selected_vlabels = args.vertexLabels if args.vertexLabels else []
+ vertex_labels_select = widgets.SelectMultiple(
+ options=available_vertex_labels,
+ description='Vertex labels:',
+ disabled=False,
+ style=SEED_WIDGET_STYLE,
+ value = selected_vlabels
+ )
+
+ # Existing edge labels in the graph are shown in the dropdown menu. If any edge label is specified on
+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
+ available_edge_labels = sorted(available_edge_labels)
+ selected_elabels = args.edgeLabels if args.edgeLabels else []
+ edge_labels_select = widgets.SelectMultiple(
+ options=available_edge_labels,
+ description='Edge labels:',
+ disabled=False,
+ style=SEED_WIDGET_STYLE,
+ value = selected_elabels
+ )
+
+ submit_button = widgets.Button(description="Submit")
+ status_output = widgets.Output()
+ output = widgets.Output()
+
+ # Display widgets
+ display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, status_output, output)
+
+ def on_button_clicked(b):
+ # Get the selected parameters
+ td = td_dropdown.value
+ vlabels = list(vertex_labels_select.value)
+ elabels = list(edge_labels_select.value)
+
+ # Clear the output widget before displaying new content
+ output.clear_output(wait=True)
+ status_output.clear_output(wait=True)
+
+ # Call the function with the selected parameters
+ with status_output:
+ display_html(HTML(loading_wheel_html))
+
+ with output:
+ try:
+ tabs_data = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns)
+ except KeyError as e:
+ print(f"Missing expected data in query results: {e}")
+ except IndexError as e:
+ print(f"Unexpected result format: {e}")
+ except Exception as e:
+ print(f"Error processing degree distribution: {e}")
+
+ res = local_ns['js']
+
+ # Clear the wheel display
+ status_output.clear_output()
+
+ # Retrieve the distribution
+ pairs = np.array(res['results'][0]['output']['distribution'])
+ keys = pairs[:,0]
+ values = pairs[:,1]
+
+ # Retrieve some statistics
+ max_deg = res['results'][0]['output']['statistics']['maxDeg']
+ median_deg = res['results'][0]['output']['statistics']['p50']
+ mean_deg = res['results'][0]['output']['statistics']['mean']
+
+ # Get the titles and children
+ titles = tabs_data['titles']
+ children = tabs_data['children']
+
+ # Plot is the first tab
+ plot_output = widgets.Output(layout=DEFAULT_LAYOUT)
+ titles.insert(0, 'Plot')
+ children.insert(0, plot_output)
+
+ # Create the interactive visualization
+ with plot_output:
+ self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg,
+ td, vlabels, elabels, len(available_vertex_labels), len(available_edge_labels))
+
+ # Define a larger layout
+ large_layout = widgets.Layout(
+ width='100%',
+ height='1200px', # Increase the height as needed
+ overflow='auto'
+ )
+
+ # Apply to the tab widget
+ tab = widgets.Tab(layout=large_layout)
+
+ # Apply to each output widget
+ for i in range(len(children)):
+ children[i].layout = large_layout
+
+ # Set up the tab widget
+ tab.children = children
+ for i, title in enumerate(titles):
+ tab.set_title(i, title)
+
+ display(tab)
+
+ submit_button.on_click(on_button_clicked)
+
+ def execute_degree_distribution_query (self, td, vlabels, elabels, local_ns):
+ try:
+ query_parts = [f'traversalDirection: "{td}"']
+
+ if vlabels:
+ vertex_str = ", ".join([f'"{v}"' for v in vlabels])
+ query_parts.append(f'vertexLabels: [{vertex_str}]')
+
+ if elabels:
+ edge_str = ", ".join([f'"{e}"' for e in elabels])
+ query_parts.append(f'edgeLabels: [{edge_str}]')
+
+ # Construct the query
+ line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
+
+ oc_rebuild_args = (f"{f'--store-to js'}")
+ tabs_data = self.handle_opencypher_query(oc_rebuild_args, line, local_ns, True)
+
+ if 'js' not in local_ns:
+ raise ValueError("Query execution failed to store results")
+
+ return tabs_data
+ except Exception as e:
+ print(f"Error executing degree distribution query: {e}")
+ return None
+
+ def suggest_degree_distribution_params (self, expected_nbins, n, min_nonzero_count, max_count, min_deg, max_deg):
+
+ degree_spread = max_deg / min_deg
+ count_spread = max_count / min_nonzero_count
+
+ # X-axis scale decision
+ if degree_spread >= 100 and n > 1000:
+ x_scale = 'Log'
+ bin_type = 'Logarithmic'
+ initial_bin_width = (np.log10(max_deg) - np.log10(min_deg)) / np.log10(expected_nbins)
+ else:
+ x_scale = 'Linear'
+ bin_type = 'Linear'
+ initial_bin_width = (max_deg - min_deg) / expected_nbins if max_deg - min_deg > 0 else 1
+
+ # Y-axis scale decision
+ if count_spread >= 50 and n > 1000:
+ y_scale = 'Log'
+ else:
+ y_scale = 'Linear'
+
+ # Small graph override
+ if n <= 1000:
+ x_scale = 'Linear'
+ y_scale = 'Linear'
+ bin_type = 'Linear'
+ initial_bin_width = 1
+
+ return {
+ 'x_scale': x_scale,
+ 'y_scale': y_scale,
+ 'bin_type': bin_type,
+ 'initial_bin_width': initial_bin_width
+ }
+
+ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg,
+ td, vlabels, elabels, num_vlabels, num_elabels):
+ expected_nbins = 20
+ min_deg = 0
+ try:
+ if unique_degrees is None or counts is None or len(unique_degrees) == 0:
+ print("No data available to visualize")
+ return
+
+ max_count = max(counts)
+ min_deg = min(unique_degrees)
+
+ # Scale widget, four options
+ scale_widget = widgets.Dropdown(
+ options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'],
+ value='Linear-Linear',
+ description='Scale:'
+ )
+
+ # Binning widget, three options
+ bin_widget = widgets.Dropdown(
+ options=['Raw', 'Linear', 'Logarithmic'],
+ value='Linear',
+ description='Binning:'
+ )
+
+ # Bin width widget, integer/float options for linear/log scale
+ bin_width_widget = widgets.FloatSlider(
+ min=1,
+ max=max(1, (max_deg - min_deg) // 5),
+ step=1,
+ value=1,
+ readout_format = 'd',
+ description='Bin width:',
+ tooltip=('For linear binning: actual width\n'
+ 'For log binning: multiplicative factor')
+ )
+
+ # Y-axis maximum control: slider for quick selection, text for custom values
+ y_max_widget = widgets.IntSlider(
+ value=int(max_count * 1.1),
+ min=1,
+ max=max(10, int(max_count * 1.1)),
+ step=1,
+ description='y-max:',
+ )
+
+ # Extend the y-max limit
+ y_max_text_widget = widgets.BoundedIntText(
+ value=int(max_count * 1.1),
+ min=1,
+ max=sys.maxsize,
+ step=1,
+ description='Extend y-max:',
+ style={
+ 'description_width': '50%',
+ 'readout_width': '30%'
+ },
+ )
+
+ # Sync slider and text input
+ def handle_text_change(change):
+ new_max = int(change.new)
+ if new_max > y_max_widget.max:
+ y_max_widget.max = new_max
+ y_max_widget.value = new_max
+
+ y_max_text_widget.observe(handle_text_change, names='value')
+
+ # Range slider for x-axis, enables zooming
+ x_range_widget = widgets.FloatRangeSlider(
+ min=0,
+ max=max_deg * 1.1 + 5,
+ value=[0, max_deg * 1.1 + 5],
+ step=1,
+ description='x-axis range:',
+ disabled=False,
+ continuous_update=True,
+ readout=True,
+ readout_format='.0f',
+ )
+
+ # Add CSS to style the slider readout with a border
+ display(HTML("""
+
+ """))
+
+ # Toggle switches for min/max degree lines
+ show_mindeg_widget = widgets.Checkbox(
+ value=True,
+ description='Show Min Degree',
+ disabled=False,
+ )
+
+ show_maxdeg_widget = widgets.Checkbox(
+ value=True,
+ description='Show Max Degree',
+ disabled=False,
+ )
+
+ max_deg = max(unique_degrees)
+ if max_deg <= 1:
+ scale_widget.value = 'Linear-Linear'
+ bin_widget.value = 'Linear'
+ bin_width_widget.value = 1
+ else:
+ min_nonzero_count = min(counts[counts > 0])
+ min_nonzero_deg = min(unique_degrees[unique_degrees > 0])
+ params = self.suggest_degree_distribution_params (expected_nbins, sum(counts), min_nonzero_count, max(counts), min_nonzero_deg, max_deg)
+
+ # Set scale_widget value based on params
+ if params['x_scale'] == 'Log' and params['y_scale'] == 'Log':
+ scale_widget.value = 'Log-Log'
+ elif params['x_scale'] == 'Log' and params['y_scale'] == 'Linear':
+ scale_widget.value = 'Log(x)-Linear(y)'
+ elif params['x_scale'] == 'Linear' and params['y_scale'] == 'Log':
+ scale_widget.value = 'Linear(x)-Log(y)'
+ else: # Both linear
+ scale_widget.value = 'Linear-Linear'
+
+ # Set bin_widget and bin_widthz_widget based on params
+ bin_widget.value = params['bin_type']
+ bin_width_widget.value = params['initial_bin_width']
+ if params['bin_type'] == 'Logarithmic':
+ bin_width_widget.step = 0.01
+ bin_width_widget.readout_format = '.2f'
+ bin_width_widget.max = max (1, (np.log10(max_deg) - np.log10(min_nonzero_deg)) / np.log10(5))
+
+ # Adjust the initial y-max limit
+ total_nodes = sum(counts)
+ factor = 1
+ if bin_widget.value == 'Raw': # For raw data, use the original max count
+ factor = max_count
+ elif bin_widget.value == 'Linear':
+ factor = min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
+ elif bin_widget.value == 'Logarithmic':
+ factor = min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+ y_max_text_widget.value = y_max_widget.max = y_max_widget.value = max (1.1 * factor, y_max_text_widget.value)
+
+ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
+ alpha = 1
+ fig = go.Figure()
+ fig.data = []
+ fig.layout = go.Layout()
+
+ # Get zero degree count
+ zero_idx = np.where(unique_degrees == 0)[0]
+ zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
+ isolateds_exist = zero_degree_count > 0
+
+ # Get non-zero degrees and counts
+ mask = unique_degrees > 0
+ filtered_degrees = unique_degrees[mask]
+ filtered_counts = counts[mask]
+
+ # Obtain the minimum non-zero degree, unless it's all zero degrees
+ if len(filtered_degrees) == 0:
+ min_deg = 0
+ else:
+ min_deg = np.min(filtered_degrees)
+
+ n_bins = 1
+
+ # Create histogram only if there is at least one non-zero degree node
+ if len(filtered_degrees) > 0:
+ if bin_type != 'Raw':
+ # Arrange the bins for a given bin_width
+ if bin_type == 'Linear':
+ n_bins = int((max_deg - min_deg) / bin_width)
+ bins = np.linspace(min_deg, max_deg, n_bins+1)
+ else: # Logarithmic
+ min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
+ max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
+ n_bins = int (10 ** ((max_deg_log - min_deg_log) / (bin_width)))
+ bins = np.logspace(min_deg_log, max_deg_log, n_bins+1)
+
+ # Efficient binning using digitize instead of histogram
+ bin_indices = np.digitize(filtered_degrees, bins) - 1
+
+ # Create histogram using prefix sum approach
+ hist_counts = np.zeros(len(bins))
+ for i, count in zip(bin_indices, filtered_counts):
+ if 0 <= i < len(hist_counts): # Ensure index is valid
+ hist_counts[i] += count
+
+ # Create a step plot that looks like bars, duplicate x values to create vertical lines
+ x_steps = np.zeros(2 * len(bins))
+ x_steps[0::2] = bins
+ if bin_type == 'Linear': # For linear bins, use constant width
+ x_steps[1::2] = bins + bin_width
+ else: # For logarithmic bins, use actual bin edges
+ bins = np.concatenate((bins, np.array([bins[-1]+bin_width])))
+ x_steps[1::2] = bins[1:]
+
+ # Duplicate y values to create horizontal lines
+ y_steps = np.zeros(2 * len(hist_counts))
+ y_steps[0::2] = hist_counts
+ y_steps[1::2] = hist_counts
+
+ fig = go.Figure(data=go.Scatter(
+ x=x_steps,
+ y=y_steps,
+ mode='lines',
+ line=dict(color='#000080', width=1.5, shape='hv'),
+ opacity=alpha,
+ hovertemplate='',
+ showlegend=False))
+
+ # Fill the area below the step plot
+ fig.add_trace(
+ go.Scatter(
+ x=x_steps,
+ y=y_steps,
+ mode='lines',
+ line=dict(shape='hv', width=0),
+ fill='tozeroy',
+ fillcolor='rgba(0, 0, 128, ' + str(alpha*0.5) + ')',
+ opacity=alpha,
+ # no text on hover because the line chart (that mimics the bars) have
+ # multiple values for an x when it goes up/down, even when bin width = 1
+ hovertemplate='',
+ showlegend=False))
+ else: # For raw data, create bars at each unique degree
+ fig.add_trace(
+ go.Bar(
+ x=filtered_degrees,
+ y=filtered_counts,
+ name='Raw',
+ marker_color='#000000',
+ opacity=alpha,
+ # text on hover only for raw data
+ hovertemplate='%{y} nodes of degree %{x}',
+ width=0.9,
+ ))
+
+ # Plot zero degree node count separately
+ if isolateds_exist:
+ # Use a special x position for zero degree nodes in log scale
+ zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
+
+ # Adjust width based on binning type
+ if bin_type == 'Linear':
+ bar_width = 1 # bin width
+ elif bin_type == 'Logarithmic':
+ bar_width = 0.05 # Fixed small width for log scale
+ else: # Raw
+ bar_width = 0.9 # Standard width for raw data
+
+ fig.add_trace(
+ go.Bar(
+ x=[zero_x_pos],
+ y=[zero_degree_count],
+ name='Isolated',
+ marker_color='red',
+ opacity=alpha,
+ hovertemplate='%{y} nodes of degree zero',
+ width=bar_width,
+ ))
+
+ # Build title components
+ degree_type_map = {
+ 'both': 'Total',
+ 'inbound': 'In',
+ 'outbound': 'Out'
+ }
+
+ degree_display = degree_type_map.get(td, td.title())
+
+ # Build vertex labels part
+ if not vlabels or len(vlabels) == num_vlabels: # if no label is selected, then all labels considered
+ vertex_part = "for all vertex labels"
+ else:
+ bold_vlabels = [f"{label}" for label in vlabels]
+ vertex_part = f"for {', '.join(bold_vlabels)} vertices"
+
+ # Build edge labels part
+ if not elabels or len(elabels) == num_elabels:# if no label is selected, then all labels considered
+ edge_part = "via all edge labels"
+ else:
+ bold_elabels = [f"{label}" for label in elabels]
+ edge_part = f"via {', '.join(bold_elabels)} edges"
+
+ title_text = f"{degree_display} degree distribution
{vertex_part}
{edge_part}"
+
+ fig.update_layout(
+ hoverlabel=dict(
+ bgcolor='rgba(0, 0, 128, ' + str(alpha*0.5) + ')',
+ font_color="white"
+ ),
+ # Size
+ width=1200,
+ height=600,
+ # Margins
+ margin=dict(
+ l=80, # left margin
+ r=50, # right margin
+ b=80, # bottom margin
+ t=100, # top margin
+ pad=4 # padding
+ ),
+ xaxis=dict(
+ showgrid=True,
+ gridwidth=1,
+ gridcolor='rgba(0,0,0,0.2)',
+ minor=dict(
+ tickmode='auto',
+ tickcolor='gray',
+ showgrid=True,
+ gridwidth=1,
+ gridcolor='lightgray',
+ ticks=""
+ )
+ ),
+ yaxis=dict(
+ showgrid=True,
+ gridwidth=1,
+ gridcolor='rgba(0,0,0,0.2)',
+ ),
+ yaxis_range=[0.05, y_max],
+ xaxis_title='Degree',
+ yaxis_title='Number of nodes',
+ title={
+ 'text': title_text,
+ 'x': 0.5,
+ 'xanchor': 'center',
+ 'yanchor': 'top'
+ },
+ legend=dict(
+ orientation="v", # Horizontal orientation
+ yanchor="auto", # Anchor legend to its bottom edge
+ xanchor="auto", # Anchor legend to its right edge
+ x=1 # Position to the right of the plot
+ )
+ )
+
+ x_min = x_range[0]
+ x_max = x_range[1]
+
+ # Set scales based on selection
+ if scale_type == 'Log-Log':
+ fig.update_layout(
+ xaxis_type="log", # Set x-axis to log scale
+ yaxis_type="log", # Set y-axis to log scale
+ yaxis_range=[0.05, np.log10(y_max)],
+ yaxis=dict(
+ exponentformat='power', # Use scientific notation
+ showexponent='all',
+ dtick=1 # Show only powers of 10
+ ),
+ xaxis=dict(
+ exponentformat='power', # Use scientific notation
+ showexponent='all',
+ dtick=1 # Show only powers of 10
+ )
+ )
+
+ x_min = max(x_min, 0.05)
+ x_min = np.log10(x_min)
+ x_max = np.log10(x_max)
+ elif scale_type == 'Log(x)-Linear(y)':
+ fig.update_layout(
+ xaxis_type="log", # Set x-axis to log scale
+ xaxis=dict(
+ exponentformat='power', # Use scientific notation
+ showexponent='all',
+ dtick=1 # Show only powers of 10
+ )
+ )
+ x_min = max(x_min, 0.05)
+ x_min = np.log10(x_min)
+ x_max = np.log10(x_max)
+ elif scale_type == 'Linear(x)-Log(y)':
+ fig.update_layout(
+ yaxis_type="log", # Set y-axis to log scale
+ yaxis_range=[0.05, np.log10(y_max)],
+ yaxis=dict(
+ exponentformat='power', # Use scientific notation
+ showexponent='all',
+ dtick=1 # Show only powers of 10
+ )
+ )
+
+ fig.update_layout(
+ xaxis_range=[x_min, x_max]
+ )
+
+ # Add vertical dashed lines for min and max degree, if enabled
+ if show_mindeg and min_deg > 0:
+ fig.add_trace(
+ go.Scatter(
+ x=[min_deg, min_deg],
+ y=[0, y_max],
+ mode='lines',
+ line=dict(color="darkgreen", width=2, dash="dash"),
+ hovertemplate='Min non-zero degree is %{x}',
+ name = f'Min non-zero degree:{min_deg}'
+ )
+ )
+
+ if show_maxdeg:
+ fig.add_trace(
+ go.Scatter(
+ x=[max_deg, max_deg],
+ y=[0, y_max],
+ mode='lines',
+ line=dict(color="darkred", width=2, dash="dash"),
+ hovertemplate='Max degree is %{x}',
+ name = f'Max degree:{max_deg}'
+ )
+ )
+
+ fig.show()
+
+ # Update statistics
+ with stats_output:
+ stats_output.clear_output(wait=True)
+ total_nodes = sum(counts)
+ total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
+
+ stats_data = {
+ 'Metric': ['Nodes', 'Edges', 'Isolated nodes', 'Average degree', 'Median degree', 'Max degree'],
+ 'Value': [f"{total_nodes:,}", f"{total_edges:,}", f"{zero_degree_count:,}", f"{mean_deg:.2f}", f"{median_deg:.2f}", f"{max_deg:,}"]
+ }
+
+ if min_deg > 0:
+ stats_data['Metric'].append('Min non-zero degree')
+ stats_data['Value'].append(f"{min_deg:,}")
+
+ if bin_type != 'Raw':
+ stats_data['Metric'].append('Number of bins')
+ stats_data['Value'].append(f"{n_bins:,}")
+
+ df = pd.DataFrame(stats_data)
+ display(df.style.hide(axis="index").set_table_styles([
+ {'selector': 'th', 'props': [('background-color', '#f8f9fa'), ('font-weight', 'bold'), ('text-align', 'left')]},
+ {'selector': 'td', 'props': [('padding', '8px'), ('text-align', 'left')]}
+ ]))
+
+ max_count = np.max(counts)
+ total_nodes = sum(counts)
+
+ # Define a function to update bin_width_widget based on bin_type
+ def update_bin_width_widget(change):
+ if change['new'] == 'Logarithmic':
+ bin_width_widget.value = 1.00
+ bin_width_widget.min = 1.00
+ bin_width_widget.max = max(1, (np.log10(max_deg) - np.log10(min_deg)) / np.log10(5))
+ bin_width_widget.step = 0.01
+ bin_width_widget.readout_format = '.2f'
+ bin_width_widget.disabled = False
+ elif change['new'] == 'Raw':
+ bin_width_widget.value = 1
+ bin_width_widget.readout_format = 'd'
+ bin_width_widget.disabled = True
+ elif change['new'] == 'Linear':
+ bin_width_widget.value = 1
+ bin_width_widget.min = 1
+ bin_width_widget.max = max(1, (max_deg - min_deg) // 5)
+ bin_width_widget.step = 1
+ bin_width_widget.readout_format = 'd'
+ bin_width_widget.disabled = False
+
+ def update_y_max_widget(change):
+ factor = 1
+ if bin_widget.value == 'Raw': # For raw data, use the original max count
+ factor = max_count
+ elif bin_widget.value == 'Linear':
+ factor = min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
+ elif bin_widget.value == 'Logarithmic':
+ factor = min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+ y_max_text_widget.value = y_max_widget.max = y_max_widget.value = max (1.1 * factor, y_max_text_widget.value)
+
+ # Observe changes to bin_width_widget and bin_widget
+ bin_width_widget.observe(update_y_max_widget, names='value')
+ bin_widget.observe(update_y_max_widget, names='value')
+
+ bin_width_widget.observe(update_bin_width_widget, names='value')
+ bin_widget.observe(update_bin_width_widget, names='value')
+
+ # Output widget for statistics
+ stats_output = widgets.Output()
+
+ # Create y_max container with both widgets
+ y_max_container = widgets.VBox([y_max_widget, y_max_text_widget])
+
+ # Interactive plot
+ interactive_plot = widgets.interactive(
+ update_plot,
+ scale_type=scale_widget,
+ bin_type=bin_widget,
+ bin_width=bin_width_widget,
+ y_max=y_max_widget,
+ x_range=x_range_widget,
+ show_mindeg=show_mindeg_widget,
+ show_maxdeg=show_maxdeg_widget
+ )
+
+ # Replace the individual checkboxes with the HBox in the interactive_plot's children
+ controls = list(interactive_plot.children[:-1])
+
+ # Replace y_max_widget with y_max_container
+ y_max_index = controls.index(y_max_widget)
+ controls[y_max_index] = y_max_container
+
+ # Create a new VBox with the modified controls and the output
+ modified_interactive = widgets.VBox(controls + [interactive_plot.children[-1]])
+
+ # Vertical box layout with stats
+ vbox = widgets.VBox([modified_interactive, stats_output])
+
+ # Display the interactive plot and stats
+ display(vbox)
+
+ except Exception as e:
+ print(f"Error creating visualization: {e}")
diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py
index 2a683452..b5b94d83 100644
--- a/src/graph_notebook/neptune/client.py
+++ b/src/graph_notebook/neptune/client.py
@@ -174,6 +174,7 @@
GRAPH_PG_INFO_METRICS = {'numVertices', 'numEdges', 'numVertexProperties', 'numEdgeProperties'}
+TRAVERSAL_DIRECTIONS = ['both', 'inbound', 'outbound']
def is_allowed_neptune_host(hostname: str, host_allowlist: list):
for host_snippet in host_allowlist: