diff --git a/Notebooks/PySpark/05 Using Azure Open Datasets in Synapse.ipynb b/Notebooks/PySpark/05 Using Azure Open Datasets in Synapse.ipynb index 4c356da..b3fda51 100644 --- a/Notebooks/PySpark/05 Using Azure Open Datasets in Synapse.ipynb +++ b/Notebooks/PySpark/05 Using Azure Open Datasets in Synapse.ipynb @@ -270,7 +270,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Enrich with weather data¶\n", + "## Enrich with weather data\n", "\n", "Now we append NOAA surface weather data to the taxi and holiday data. Use a similar approach to fetch the [NOAA weather history data](https://azure.microsoft.com/en-us/services/open-datasets/catalog/noaa-integrated-surface-data/) from Azure Open Datasets. " ], diff --git a/Notebooks/PySpark/07 Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib.ipynb b/Notebooks/PySpark/07 Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib.ipynb index 24bca0c..80b99f9 100644 --- a/Notebooks/PySpark/07 Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib.ipynb +++ b/Notebooks/PySpark/07 Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib.ipynb @@ -48,7 +48,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Ingest Data¶ \n", + "## Ingest Data\n", "\n", "Get a sample data of nyc yellow taxi to make it faster/easier to evaluate different approaches to prep for the modelling phase later in the notebook." ], diff --git a/Notebooks/metadata.json b/Notebooks/metadata.json new file mode 100644 index 0000000..e37dc5c --- /dev/null +++ b/Notebooks/metadata.json @@ -0,0 +1,68 @@ +{ + "data": [ + { + "title": "Using Azure Open Datasets in Synapse'", + "description": "This notebook provides examples of how to enrich NYC Green Taxi Data with Holiday and Weather.", + "path": "PySpark\\05 Using Azure Open Datasets in Synapse.ipynb", + "tags": ["Column projection", "Filtering", "Join", "Spark Table"], + "types": ["PySpark", "Sample"], + "languages": ["PySpark"] + }, + { + "title": "Charting in Synapse Notebook", + "description": "This notebook provides examples to visualize data in Synapse notebook using matplotlib, bokeh, or seaborn.", + "path": "PySpark\\06 Charting in Synapse Notebook.ipynb", + "tags": ["Matplotlib", "Bokeh", "Seaborn", "Visualization", "Charting"], + "types": ["PySpark", "Sample"], + "languages": ["PySpark"] + }, + { + "title": "Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib", + "description": "The notebook ingests, visualizes, prepares and then trains a model based on an Open Dataset that tracks NYC Yellow Taxi trips and various attributes around them. The goal is to predict for a given trip whether there will be a trip or not.", + "path": "PySpark\\07 Data Exploration and ML Modeling - NYC taxi predict using Spark MLlib.ipynb", + "tags": ["Spark MLlib", "Matplotlib", "Logistic Regression", "Machine Learning"], + "types": ["PySpark", "Sample"], + "languages": ["PySpark"] + }, + { + "title": "Getting Started with Hyperspace Indexing", + "description": "This notebook shows you a tour of using Hyperspace indexing and how you could accelerate your Apache Spark workloads.", + "path": "PySpark\\Hitchhikers Guide to Hyperspace - Python.ipynb", + "tags": ["Spark", "Hyperspace Indexing", "Python", "PySpark", "Incremental", "Hybrid-Scan", "Accelerate", "Join", "Filter", "Acceleration", "Fast", "Parquet", "JSON", "CSV", "Workload"], + "types": ["PySpark", "Sample"], + "languages": ["PySpark"] + }, + { + "title": "Getting Started with Hyperspace Indexing", + "description": "This notebook shows you a tour of using Hyperspace indexing and how you could accelerate your Apache Spark workloads.", + "path": "Scala\\Hitchhikers Guide to Hyperspace - Scala.ipynb", + "tags": ["Spark", "Hyperspace Indexing", "Scala", "Incremental", "Hybrid-Scan", "Accelerate", "Join", "Filter", "Acceleration", "Fast", "Parquet", "JSON", "CSV", "Workload"], + "types": ["Scala", "Sample"], + "languages": ["Scala"] + }, + { + "title": "Experimenting with dotNET for Spark Using CreateDataFrame", + "description": "A Spark DataFrame is a distributed collection of data organized into named columns that provides operations to filter, group, or compute aggregates, and can be used with Spark SQL. It is conceptually equivalent to a table in a relational database or a data frame in R/Python, but with richer optimizations under the hood.", + "path": "Spark.NET C#\\Experimenting with DotNet for Spark Using CreateDataFrame.ipynb", + "tags": ["C#", "Sample", "Spark.NET C#", "CreateDataFrame"], + "types": ["Spark.NET C#", "Sample"], + "languages": ["Spark.NET C#"] + }, + { + "title": "Getting Started with Hyperspace Indexing", + "description": "This notebook shows you a tour of using Hyperspace indexing and how you could accelerate your Apache Spark workloads.", + "path": "Spark.NET C#\\Hitchhikers Guide to Hyperspace - CSharp.ipynb", + "tags": ["Spark", "Hyperspace Indexing", ".NET", "dotnet", "dotnet-core" , "Incremental", "Hybrid-Scan", "Accelerate", "Join", "Filter", "Acceleration", "Fast", "Parquet", "JSON", "CSV", "Workload"], + "types": ["Spark.NET C#", "Sample"], + "languages": ["Spark.NET C#"] + }, + { + "title": "User-Defined Functions with Complex Types in dotNET for Spark", + "description": "This notebook explains how to construct UDFs in C# and includes example functions, such as how to use UDFs with complex Row objects.", + "path": "Spark.NET C#\\User-Defined Functions with Complex Types in DotNet for Spark.ipynb", + "tags": ["C#", "Sample", "Spark.NET C#", "User-Defined Functions"], + "types": ["Spark.NET C#", "Sample"], + "languages": ["Spark.NET C#"] + } + ] +}