diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..15b0a96 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(git branch:*)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml index e1976ef..80f0fd9 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -9,8 +9,6 @@ website: search: true logo: assets/isampleslogopetal.png tools: - - icon: table - href: https://hyde.cyverse.org/isamples_central/ui/ - icon: github href: https://github.com/isamplesorg - icon: slack diff --git a/about.qmd b/about.qmd index 076d4c3..309fbed 100644 --- a/about.qmd +++ b/about.qmd @@ -4,10 +4,18 @@ title: "About iSamples" # Project Objectives -1. Design and develop iSamples infrastructure (iSamples in a Box and iSamples Central); +1. Design and develop iSamples infrastructure (iSamples in a Box and distributed data systems); 2. Build four initial implementations of iSamples for adoption and use case testing (Open Context, GEOME, SESAR, and Smithsonian Institution); 3. Conduct outreach and community engagement to developers, individual researchers, and international organizations concerned with material samples. +## Current Data Access + +**Note**: iSamples Central is currently unavailable. The project has transitioned to a **geoparquet-based approach** for data access and analysis: + +- **Primary Data Source**: Comprehensive geoparquet files containing millions of sample records +- **Analysis Platform**: Browser-based tools using DuckDB-WASM and Observable +- **Coverage**: Complete datasets from SESAR, OpenContext, GEOME, and Smithsonian collections + ![iSamples diagram](assets/iSamplesArchitecture.png) diff --git a/design/requirements.md b/design/requirements.md index 2e19067..0547b5d 100644 --- a/design/requirements.md +++ b/design/requirements.md @@ -337,7 +337,7 @@ Components ## 15 All content sources should be assumed to be dynamic and attached components should facilitate efficient synchronization of subscribed content. -iSamples central will need to continually update the catalog and promote dissemination of the content to subscribers (e.g. iSB instances). +With the transition to geoparquet-based data access, content synchronization now occurs through periodic updates of parquet files rather than real-time API synchronization. This approach provides better performance and reliability for analytical workloads. Derived from: diff --git a/index.qmd b/index.qmd index 744030e..f0e2789 100644 --- a/index.qmd +++ b/index.qmd @@ -6,6 +6,15 @@ subtitle: "Toward an Interdisciplinary Cyberinfrastructure for Material Samples The Internet of Samples (iSamples) is a multi-disciplinary and multi-institutional project funded by the National Science Foundation to design, develop, and promote service infrastructure to uniquely, consistently, and conveniently identify material samples, record metadata about them, and persistently link them to other samples and derived digital content, including images, data, and publications. +## Current Data Access: Geoparquet-Based Approach + +**Note**: iSamples Central is currently unavailable. The project now uses **geoparquet files** for efficient, browser-based data access and analysis: + +- πŸ“Š **[Interactive Tutorials](/tutorials/)** - Modern browser-based analysis with DuckDB-WASM +- πŸ—ΊοΈ **Comprehensive Coverage** - Complete datasets from SESAR, OpenContext, GEOME, and Smithsonian +- πŸš€ **High Performance** - Fast, efficient data access with minimal memory usage +- 🌐 **Universal Access** - Works in any modern browser without software installation + **Resources** * [Recording of project presentation at the 2020 SPNHC & ICOM NATHIST Conference](https://youtu.be/eRUw5NMksFo?t=105) diff --git a/tutorials/index.qmd b/tutorials/index.qmd index 241abf2..e022a3b 100644 --- a/tutorials/index.qmd +++ b/tutorials/index.qmd @@ -2,66 +2,39 @@ title: "Tutorials: Overview" --- -Here's where we park our various tutorials! +Welcome to the iSamples tutorials! These tutorials demonstrate how to work with sample data using modern browser-based tools and geoparquet files. -Get the OpenAPI spec. +## Available Data Sources -```{ojs} -//| echo: true +With iSamples Central currently unavailable, all tutorials now use **geoparquet files** as the primary data source: -// Get the OpenAPI specification and display detailed endpoint information -viewof apiEndpointDetails = { - // Show loading indicator - const loadingElement = html`
Loading API endpoints...
`; - document.body.appendChild(loadingElement); +### Primary Data Sources +- **Zenodo Complete Dataset**: ~300MB, 6+ million records from all iSamples sources +- **OpenContext Parquet**: Curated archaeological sample data +- **Domain-specific Collections**: Specialized datasets for focused analysis - try { - const OPENAPI_URL = 'https://central.isample.xyz/isamples_central/openapi.json'; +### Tutorial Categories - // Fetch the OpenAPI spec - const response = await fetch(OPENAPI_URL); - if (!response.ok) throw new Error(`Failed to fetch API spec: ${response.status}`); +**πŸ—ΊοΈ Geographic Analysis** +- Interactive mapping and spatial exploration +- Regional distribution analysis +- Cesium-based 3D visualizations - const apiSpec = await response.json(); +**πŸ“Š Data Analysis** +- Statistical analysis with DuckDB-WASM +- Material category distributions +- Cross-collection comparisons - // Extract detailed information about each endpoint - const endpointDetails = []; +**πŸš€ Performance Demonstrations** +- Browser-based big data analysis +- Efficient sampling and visualization techniques +- HTTP range request optimization - for (const [path, pathMethods] of Object.entries(apiSpec.paths)) { - for (const [method, details] of Object.entries(pathMethods)) { - endpointDetails.push({ - endpoint: path, - method: method.toUpperCase(), - summary: details.summary || '', - operationId: details.operationId || '', - tags: (details.tags || []).join(', '), - parameters: (details.parameters || []) - .map(p => `${p.name} (${p.required ? 'required' : 'optional'})`) - .join(', ') - }); - } - } +## Why Geoparquet? - // Create a table with the detailed endpoint information - return Inputs.table( - endpointDetails, - { - label: "iSamples API Endpoints Details", - width: { - endpoint: 150, - method: 80, - summary: 200, - operationId: 200, - tags: 100, - parameters: 300 - } - } - ); - } catch (error) { - return html`
Error fetching API endpoints: ${error.message}
`; - } finally { - // Remove loading indicator - loadingElement.remove(); - } -} -``` \ No newline at end of file +Our tutorials showcase how **geoparquet + DuckDB-WASM** enables: +- βœ… **Universal access**: No software installation required +- βœ… **Fast analysis**: 5-10x faster than traditional approaches (e.g., downloading full CSV datasets and analyzing them locally). [See benchmark](https://duckdb.org/2023/05/10/duckdb-wasm.html) +- βœ… **Memory efficient**: Analyze 300MB datasets using <100MB browser memory +- βœ… **Minimal data transfer**: Only download what you need +- βœ… **Interactive exploration**: Real-time parameter adjustment \ No newline at end of file