From d85ea95a15851c8f825b8109c969a0f84ee71cc8 Mon Sep 17 00:00:00 2001 From: Nicola Vitucci Date: Thu, 3 Oct 2024 14:35:34 +0100 Subject: [PATCH 1/2] Fix errors in doc --- modules/ROOT/pages/gds.adoc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/ROOT/pages/gds.adoc b/modules/ROOT/pages/gds.adoc index 8bb183c..1cc6372 100644 --- a/modules/ROOT/pages/gds.adoc +++ b/modules/ROOT/pages/gds.adoc @@ -71,7 +71,7 @@ spark.read.format("org.neo4j.spark.DataSource") which will show a result like this: -```bash +``` +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+ |nodeProjection |relationshipProjection |graphName|nodeCount|relationshipCount|projectMillis| +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+ @@ -103,7 +103,7 @@ spark.read.format("org.neo4j.spark.DataSource") ---- ( spark.read.format("org.neo4j.spark.DataSource") - .option("gds", "gds.pageRank.stream") + .option("gds", "gds.pageRank.stream.estimate") .option("gds.graphName", "myGraph") .option("gds.configuration.concurrency", "2") .load() @@ -187,12 +187,11 @@ As you can see, we have now only the two columns `nodeId` and `score`, let's see # we'll assume that `spark` variable is already present # we create the `nodes_df` nodes_df = spark.read.format("org.neo4j.spark.DataSource") \ - .option("url", "neo4j://localhost:7687") \ .option("labels", "Page") \ .load() # we join `nodes_df` with `pr_df` created in the step before - new_df = nodes_df.join(pr_df, nodes_df.col("").equalTo(pr_df.col("nodeId"))) + new_df = nodes_df.join(pr_df, nodes_df[""] == pr_df["nodeId"]) new_df.show(truncate=False) ---- From c091ca3b3c0eaa4b6635fac1d1e0c890dfefa8cb Mon Sep 17 00:00:00 2001 From: Nicola Vitucci Date: Wed, 27 Nov 2024 10:49:12 +0000 Subject: [PATCH 2/2] Fix link --- modules/ROOT/pages/faq.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/pages/faq.adoc b/modules/ROOT/pages/faq.adoc index 057a4fd..406545c 100644 --- a/modules/ROOT/pages/faq.adoc +++ b/modules/ROOT/pages/faq.adoc @@ -101,7 +101,7 @@ Refer to xref:overview.adoc#_spark_and_scala_compatibility[this page] to know wh This might happen when creating a new graph using the GDS library. The issue here is that the query is run the first time to extract the DataFrame schema and then is run again to get the data. -To avoid this issue you can use the xref:quickstart.adoc#user-defined-schema[user defined schema] approach. +To avoid this issue you can use the xref:read/define-schema.adoc#custom-schema[user defined schema] approach. == Databricks setup