Using Cesium for display of remote parquet (iSamples Wide Format).

parquet
spatial
recipe
wide
isamples

This page renders points from the full iSamples wide-format parquet file (all sources: SESAR, OpenContext, GEOME, Smithsonian) on Cesium using point primitives, with zoom-adaptive H3 clustering for fast initial load.

NoteiSamples Full Dataset (Wide Format + H3 Spatial Index)

This page uses the iSamples combined dataset (Jan 2026) which includes:

  • 6.7M MaterialSampleRecords from all iSamples sources
  • Source breakdown: SESAR (4.6M), OpenContext (1M), GEOME (605K), Smithsonian (322K)
  • ~292 MB wide format with H3 indices (vs ~850 MB narrow) - 66% smaller
  • H3 spatial index: Pre-computed h3_res4, h3_res6, h3_res8 columns for zoom-adaptive clustering
  • Clustered view: At high altitude shows res4 clusters, medium shows res6, close-up shows res8
  • Toggle: Switch between clustered (fast) and all-points (detailed) views
  • Color-coded: Points/clusters colored by dominant data source

DuckDB-WASM running in the browser cannot access local files via file:// URLs due to browser security restrictions. However, you can use a local cached file when running quarto preview:

Local Development (recommended)

Download the file locally, then serve it:

# Download the wide parquet file (~242MB)
curl -O https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet

# Serve it locally
python3 -m http.server 8000

Then use: http://localhost:8000/isamples_202601_wide.parquet

Benefits of wide format file: - 67% smaller than narrow format (~280 MB vs ~850 MB) - Much faster initial load (less network transfer) - Simpler queries with direct column access - Works offline once cached

Limitation: Only works during local development, not on published GitHub Pages.

WarningHeads up: first interaction may be slow

The first click or query can take a few seconds while the in‑browser database engine initializes and the remote Parquet file is fetched and indexed. Subsequent interactions are much faster because both the browser and DuckDB cache metadata and column chunks, so later queries reuse what was already loaded.

Code
db = {
  const instance = await DuckDBClient.of();
  await instance.query(`create view nodes as select * from read_parquet('${parquet_path}')`)
  return instance;
}


async function loadData(query, params = [], waiting_id = null, key = "default") {
    // latest-only guard per key
    loadData._latest = loadData._latest || new Map();
    const requestToken = Symbol();
    loadData._latest.set(key, requestToken);

    // Get loading indicator
    const waiter = waiting_id ? document.getElementById(waiting_id) : null;
    if (waiter) waiter.hidden = false;

    try {
        // Run the (slow) query
        const _results = await db.query(query, params);
        // Ignore stale responses
        if (loadData._latest.get(key) !== requestToken) return null;
        return _results;
    } catch (error) {
        console.error('Query error:', error);
        if (waiter && loadData._latest.get(key) === requestToken) {
            waiter.innerHTML = `<pre>${error}</pre>`;
        }
        return null;
    } finally {
        // Hide the waiter (if there is one) only if latest
        if (waiter && loadData._latest.get(key) === requestToken) {
            waiter.hidden = true;
        }
    }
}

// Query H3 clusters at a given resolution
async function queryH3Clusters(h3Res) {
    const col = `h3_res${h3Res}`;
    const query = `
        SELECT
            ${col} as hex_id,
            COUNT(*) as n,
            AVG(latitude) as lat,
            AVG(longitude) as lon,
            MODE(source) as dominant_source
        FROM nodes
        WHERE otype = 'MaterialSampleRecord'
          AND ${col} IS NOT NULL
          AND latitude IS NOT NULL
          AND longitude IS NOT NULL
        GROUP BY ${col}
    `;
    return await loadData(query, [], "loading_1", "clusters");
}

// Render clustered points on the globe
async function renderClusters(clusters) {
    content.points.removeAll();
    if (!clusters || clusters.length === 0) return;

    const scalar = new Cesium.NearFarScalar(1.5e2, 2, 8.0e6, 0.5);
    const maxCount = Math.max(...clusters.map(c => c.n));

    for (const cluster of clusters) {
        const source = cluster.dominant_source || 'default';
        const colorHex = CLUSTER_COLORS[source] || CLUSTER_COLORS.default;
        const color = Cesium.Color.fromCssColorString(colorHex);
        // Size proportional to log of count (range: 4-20px)
        const size = Math.max(4, Math.min(20, 4 + Math.log(Number(cluster.n)) * 2.5));

        content.points.add({
            id: `cluster_${cluster.hex_id}_n${cluster.n}_${source}`,
            position: Cesium.Cartesian3.fromDegrees(
                cluster.lon,
                cluster.lat,
                0
            ),
            pixelSize: size,
            color: color,
            scaleByDistance: scalar,
        });
    }
    content.enableTracking();
}

locations = {
    // Performance telemetry
    performance.mark('locations-start');

    const loadingDiv = document.getElementById('loading_1');
    if (loadingDiv) {
        loadingDiv.hidden = false;
        loadingDiv.innerHTML = 'Loading...';
    }

    // Remove any existing camera change listener to avoid leaks
    if (content._cameraChangedHandler) {
        content.viewer.camera.changed.removeEventListener(content._cameraChangedHandler);
        content._cameraChangedHandler = null;
    }
    if (content._cameraChangedDebounceTimer) {
        clearTimeout(content._cameraChangedDebounceTimer);
        content._cameraChangedDebounceTimer = null;
    }

    if (viewModeToggle === "clustered") {
        // Clustered mode: load H3 clusters based on initial zoom level
        if (loadingDiv) loadingDiv.innerHTML = 'Loading H3 clusters (res4)...';

        performance.mark('query-start');
        const clusters = await queryH3Clusters(4);
        performance.mark('query-end');
        performance.measure('locations-query', 'query-start', 'query-end');
        const queryTime = performance.getEntriesByName('locations-query')[0].duration;

        if (!clusters) {
            if (loadingDiv) {
                loadingDiv.innerHTML = 'Cluster query failed - check console';
                loadingDiv.hidden = false;
            }
            return [];
        }

        console.log(`H3 cluster query (res4) in ${queryTime.toFixed(0)}ms - ${clusters.length} clusters`);

        performance.mark('render-start');
        await renderClusters(clusters);
        performance.mark('render-end');
        performance.measure('locations-render', 'render-start', 'render-end');

        if (loadingDiv) loadingDiv.hidden = true;

        // Set up camera change listener for zoom-adaptive LOD
        let lastRes = 4;
        let debounceTimer = null;
        const cameraChangedHandler = () => {
            if (viewModeToggle !== "clustered") return;
            const height = content.viewer.camera.positionCartographic.height;
            const newRes = getH3ResForHeight(height);
            if (newRes !== lastRes) {
                lastRes = newRes;
                clearTimeout(debounceTimer);
                debounceTimer = setTimeout(async () => {
                    const ld = document.getElementById('loading_1');
                    if (ld) { ld.hidden = false; ld.innerHTML = `Loading H3 clusters (res${newRes})...`; }
                    const newClusters = await queryH3Clusters(newRes);
                    if (newClusters) {
                        await renderClusters(newClusters);
                        console.log(`Zoom-adaptive: switched to res${newRes}, ${newClusters.length} clusters`);
                    }
                    if (ld) ld.hidden = true;
                }, 300);
                content._cameraChangedDebounceTimer = debounceTimer;
            }
        };
        content._cameraChangedHandler = cameraChangedHandler;
        content.viewer.camera.changed.addEventListener(cameraChangedHandler);
        content.viewer.camera.percentageChanged = 0.1;

        performance.mark('locations-end');
        performance.measure('locations-total', 'locations-start', 'locations-end');
        return clusters;

    } else {
        // All points mode: load every geocode (original behavior)
        if (loadingDiv) loadingDiv.innerHTML = 'Loading all geocodes...';

        const query = `
            SELECT DISTINCT
                pid,
                latitude,
                longitude
            FROM nodes
            WHERE otype = 'GeospatialCoordLocation'
              AND latitude IS NOT NULL
              AND longitude IS NOT NULL
        `;

        performance.mark('query-start');
        const data = await loadData(query, [], "loading_1", "locations");
        performance.mark('query-end');
        performance.measure('locations-query', 'query-start', 'query-end');
        const queryTime = performance.getEntriesByName('locations-query')[0].duration;

        if (!data) {
            if (loadingDiv) {
                loadingDiv.innerHTML = 'Query failed - check console for errors';
                loadingDiv.hidden = false;
            }
            return [];
        }

        console.log(`Query executed in ${queryTime.toFixed(0)}ms - retrieved ${data.length} locations`);

        content.points.removeAll();
        const defaultColor = Cesium.Color.fromCssColorString('#2E86AB');
        const defaultSize = 4;
        const CHUNK_SIZE = 500;
        const scalar = new Cesium.NearFarScalar(1.5e2, 2, 8.0e6, 0.2);

        performance.mark('render-start');
        for (let i = 0; i < data.length; i += CHUNK_SIZE) {
            const chunk = data.slice(i, i + CHUNK_SIZE);
            const endIdx = Math.min(i + CHUNK_SIZE, data.length);

            if (loadingDiv) {
                const pct = Math.round((endIdx / data.length) * 100);
                loadingDiv.innerHTML = `Rendering geocodes... ${endIdx.toLocaleString()}/${data.length.toLocaleString()} (${pct}%)`;
            }

            for (const row of chunk) {
                content.points.add({
                    id: row.pid,
                    position: Cesium.Cartesian3.fromDegrees(row.longitude, row.latitude, 0),
                    pixelSize: defaultSize,
                    color: defaultColor,
                    scaleByDistance: scalar,
                });
            }

            if (i + CHUNK_SIZE < data.length) {
                await new Promise(resolve => setTimeout(resolve, 0));
            }
        }
        performance.mark('render-end');
        performance.measure('locations-render', 'render-start', 'render-end');
        const renderTime = performance.getEntriesByName('locations-render')[0].duration;

        if (loadingDiv) loadingDiv.hidden = true;

        performance.mark('locations-end');
        performance.measure('locations-total', 'locations-start', 'locations-end');
        const totalTime = performance.getEntriesByName('locations-total')[0].duration;
        console.log(`Rendering completed in ${renderTime.toFixed(0)}ms, total: ${totalTime.toFixed(0)}ms`);

        content.enableTracking();
        return data;
    }
}


function createShowPrimitive(viewer) {
    return function(movement) {
        const selectPoint = viewer.viewer.scene.pick(movement.endPosition);

        // Clear previous selection
        if (viewer.currentSelection !== null) {
            if (Cesium.defined(selectPoint) && selectPoint !== viewer.currentSelection) {
                viewer.currentSelection.primitive.pixelSize = viewer.currentSelection._origSize || 4;
                viewer.currentSelection.primitive.outlineColor = Cesium.Color.TRANSPARENT;
                viewer.currentSelection.outlineWidth = 0;
                viewer.currentSelection = null;
            }
        }

        if (Cesium.defined(selectPoint) && selectPoint.hasOwnProperty("primitive")) {
            viewer.pointLabel.position = selectPoint.primitive.position;
            viewer.pointLabel.label.show = true;

            // Parse cluster info from ID (format: cluster_<hexid>_n<count>_<source>)
            const id = String(selectPoint.id || '');
            if (id.startsWith('cluster_')) {
                const parts = id.split('_');
                const count = parts[2] ? parts[2].replace('n', '') : '?';
                const source = parts.slice(3).join('_') || '?';
                const countNum = Number(count);
                const countLabel = Number.isFinite(countNum) ? countNum.toLocaleString() : count;
                viewer.pointLabel.label.text = `Cluster: ${countLabel} samples\nSource: ${source}\nCell: ${parts[1]}`;
            } else {
                viewer.pointLabel.label.text = `${selectPoint.id}`;
            }

            selectPoint._origSize = selectPoint.primitive.pixelSize;
            selectPoint.primitive.pixelSize = 20;
            selectPoint.primitive.outlineColor = Cesium.Color.YELLOW;
            selectPoint.primitive.outlineWidth = 3;
            viewer.currentSelection = selectPoint;
        } else {
            viewer.pointLabel.label.show = false;
        }
    }
}

class CView {
    constructor(target) {
        this.viewer = new Cesium.Viewer(
            target, {
                timeline: false,
                animation: false,
                baseLayerPicker: false,
                fullscreenElement: target,
                terrain: Cesium.Terrain.fromWorldTerrain()
            });
        this.currentSelection = null;
        this.point_size = 1;
        this.n_points = 0;
        // https://cesium.com/learn/cesiumjs/ref-doc/PointPrimitiveCollection.html
        this.points = new Cesium.PointPrimitiveCollection();
        this.viewer.scene.primitives.add(this.points);

        this.pointLabel = this.viewer.entities.add({
            label: {
            show: false,
            showBackground: true,
            font: "14px monospace",
            horizontalOrigin: Cesium.HorizontalOrigin.LEFT,
            verticalOrigin: Cesium.VerticalOrigin.BOTTOM,
            pixelOffset: new Cesium.Cartesian2(15, 0),
            // this attribute will prevent this entity clipped by the terrain
            disableDepthTestDistance: Number.POSITIVE_INFINITY,
            text:"",
            },
        });

        this.pickHandler = new Cesium.ScreenSpaceEventHandler(this.viewer.scene.canvas);
        // Can also do this rather than wait for the points to be generated
        //this.pickHandler.setInputAction(createShowPrimitive(this), Cesium.ScreenSpaceEventType.MOUSE_MOVE);

        this.selectHandler = new Cesium.ScreenSpaceEventHandler(this.viewer.scene.canvas);
        this.selectHandler.setInputAction((e) => {
            const selectPoint = this.viewer.scene.pick(e.position);
            if (Cesium.defined(selectPoint) && selectPoint.hasOwnProperty("primitive")) {
                mutable clickedPointId = selectPoint.id;
            }
        },Cesium.ScreenSpaceEventType.LEFT_CLICK);

    }

    enableTracking() {
        this.pickHandler.setInputAction(createShowPrimitive(this), Cesium.ScreenSpaceEventType.MOUSE_MOVE);
    }
}

content = new CView("cesiumContainer");

async function getGeoRecord(pid) {
    if (pid === null || pid ==="" || pid == "unset") {
        return "unset";
    }
    // Zenodo wide parquet has latitude/longitude as direct columns
    const q = `SELECT row_id, pid, otype, latitude, longitude
               FROM nodes WHERE otype='GeospatialCoordLocation' AND pid=?`;
    const rows = await loadData(q, [pid], "loading_geo", "geo");
    return rows && rows.length ? rows[0] : null;
}

// WIDE FORMAT: Direct Location (p__sample_location / EVENT_SAMPLE_LOCATION)
// Uses p__sample_location column instead of edge row JOINs
async function get_samples_1(pid) {
    if (pid === null || pid ==="" || pid == "unset") {
        return [];
    }
    // Direct Location (EVENT_SAMPLE_LOCATION) - WIDE FORMAT version
    // Uses p__* columns instead of edge rows
    // Zenodo wide parquet has latitude/longitude as direct columns
    const q = `
        SELECT
            geo.latitude,
            geo.longitude,
            site.label AS sample_site_label,
            site.pid AS sample_site_pid,
            samp.pid AS sample_pid,
            samp.label AS sample_label,
            samp.description AS sample_description,
            'direct_event_location' as location_path
        FROM nodes AS geo
        -- Wide format: SamplingEvent has p__sample_location column with geo row_ids
        JOIN nodes AS se ON (
            se.otype = 'SamplingEvent'
            AND list_contains(se.p__sample_location, geo.row_id)
        )
        -- Wide format: SamplingEvent has p__sampling_site column with site row_ids
        JOIN nodes AS site ON (
            site.otype = 'SamplingSite'
            AND list_contains(se.p__sampling_site, site.row_id)
        )
        -- Wide format: MaterialSampleRecord has p__produced_by column with event row_ids
        JOIN nodes AS samp ON (
            samp.otype = 'MaterialSampleRecord'
            AND list_contains(samp.p__produced_by, se.row_id)
        )
        WHERE geo.pid = ?
          AND geo.otype = 'GeospatialCoordLocation'
        LIMIT 100
    `;
    performance.mark('samples1-start');
    const result = await loadData(q, [pid], "loading_s1", "samples_1");
    performance.mark('samples1-end');
    performance.measure('samples1-query', 'samples1-start', 'samples1-end');
    const queryTime = performance.getEntriesByName('samples1-query')[0].duration;
    console.log(`Direct location query (wide) executed in ${queryTime.toFixed(0)}ms - retrieved ${result?.length || 0} samples`);
    return result ?? [];
}

// WIDE FORMAT: Site-Mediated Location (p__sampling_site → p__site_location)
// Uses p__site_location and p__sampling_site columns
async function get_samples_2(pid) {
    if (pid === null || pid ==="" || pid == "unset") {
        return [];
    }
    // Site-Mediated Location (SITE_LOCATION) - WIDE FORMAT version
    // Zenodo wide parquet has latitude/longitude as direct columns
    const q = `
        SELECT
            geo.latitude,
            geo.longitude,
            site.label AS sample_site_label,
            site.pid AS sample_site_pid,
            samp.pid AS sample_pid,
            samp.label AS sample_label,
            samp.description AS sample_description,
            'via_site_location' as location_path
        FROM nodes AS geo
        -- Wide format: SamplingSite has p__site_location column with geo row_ids
        JOIN nodes AS site ON (
            site.otype = 'SamplingSite'
            AND list_contains(site.p__site_location, geo.row_id)
        )
        -- Wide format: SamplingEvent has p__sampling_site column with site row_ids
        JOIN nodes AS se ON (
            se.otype = 'SamplingEvent'
            AND list_contains(se.p__sampling_site, site.row_id)
        )
        -- Wide format: MaterialSampleRecord has p__produced_by column with event row_ids
        JOIN nodes AS samp ON (
            samp.otype = 'MaterialSampleRecord'
            AND list_contains(samp.p__produced_by, se.row_id)
        )
        WHERE geo.pid = ?
          AND geo.otype = 'GeospatialCoordLocation'
        LIMIT 100
    `;
    performance.mark('samples2-start');
    const result = await loadData(q, [pid], "loading_s2", "samples_2");
    performance.mark('samples2-end');
    performance.measure('samples2-query', 'samples2-start', 'samples2-end');
    const queryTime = performance.getEntriesByName('samples2-query')[0].duration;
    console.log(`Site-mediated location query (wide) executed in ${queryTime.toFixed(0)}ms - retrieved ${result?.length || 0} samples`);
    return result ?? [];
}

// WIDE FORMAT: Combined query (p__sample_location + p__sampling_site)
async function get_samples_at_geo_cord_location_via_sample_event(pid) {
    if (pid === null || pid ==="" || pid == "unset") {
        return [];
    }
    // Combined query - WIDE FORMAT version
    // Zenodo wide parquet has latitude/longitude as direct columns
    const q = `
        SELECT
            geo.latitude,
            geo.longitude,
            site.label AS sample_site_label,
            site.pid AS sample_site_pid,
            samp.pid AS sample_pid,
            samp.label AS sample_label,
            samp.description AS sample_description
        FROM nodes AS geo
        -- Wide format: SamplingEvent.p__sample_location contains geo row_ids
        JOIN nodes AS se ON (
            se.otype = 'SamplingEvent'
            AND list_contains(se.p__sample_location, geo.row_id)
        )
        -- Wide format: SamplingEvent.p__sampling_site contains site row_ids
        JOIN nodes AS site ON (
            site.otype = 'SamplingSite'
            AND list_contains(se.p__sampling_site, site.row_id)
        )
        -- Wide format: MaterialSampleRecord.p__produced_by contains event row_ids
        JOIN nodes AS samp ON (
            samp.otype = 'MaterialSampleRecord'
            AND list_contains(samp.p__produced_by, se.row_id)
        )
        WHERE geo.pid = ?
          AND geo.otype = 'GeospatialCoordLocation'
        LIMIT 100
    `;
    performance.mark('eric-query-start');
    const result = await loadData(q, [pid], "loading_combined", "samples_combined");
    performance.mark('eric-query-end');
    performance.measure('eric-query', 'eric-query-start', 'eric-query-end');
    const queryTime = performance.getEntriesByName('eric-query')[0].duration;
    console.log(`Combined query (wide) executed in ${queryTime.toFixed(0)}ms - retrieved ${result?.length || 0} samples`);
    return result ?? [];
}

async function locationUsedBy(rowid){
    if (rowid === undefined || rowid === null) {
        return [];
    }
    // Wide format: Check which entities reference this location via p__* columns
    const q = `
        SELECT pid, otype FROM nodes
        WHERE list_contains(p__sample_location, ?)
           OR list_contains(p__site_location, ?)
    `;
    return db.query(q, [rowid, rowid]);
}

mutable clickedPointId = "unset";
// Loading flags to control UI clearing while fetching
mutable geoLoading = false;
mutable s1Loading = false;
mutable s2Loading = false;
mutable combinedLoading = false;

// Precompute selection-driven data with loading flags
selectedGeoRecord = {
    mutable geoLoading = true;
    try {
        return await getGeoRecord(clickedPointId);
    } finally {
        mutable geoLoading = false;
    }
}

selectedSamples1 = {
    mutable s1Loading = true;
    try {
        return await get_samples_1(clickedPointId);
    } finally {
        mutable s1Loading = false;
    }
}

selectedSamples2 = {
    mutable s2Loading = true;
    try {
        return await get_samples_2(clickedPointId);
    } finally {
        mutable s2Loading = false;
    }
}

selectedSamplesCombined = {
    mutable combinedLoading = true;
    try {
        return await get_samples_at_geo_cord_location_via_sample_event(clickedPointId);
    } finally {
        mutable combinedLoading = false;
    }
}

md`Retrieved ${pointdata.length} ${viewModeToggle === "clustered" ? "clusters" : "locations"} from ${parquet_path}.
${viewModeToggle === "clustered" ? "\n*Clustered view: point size reflects sample count, color reflects dominant source. Zoom in/out to change H3 resolution.*" : ""}`;
Loading…
Code
viewof pointdata = {
    const data_table = Inputs.table(locations, {
        header: {
            pid: "PID",
            latitude: "Latitude",
            longitude: "Longitude",
            location_type: "Location Type"
        },
    });
    return data_table;
}

The click point ID is “”.

1 getGeoRecord (selected)

Code
pid = clickedPointId
testrecord = selectedGeoRecord;

2 Samples at Location (Wide Format)

This query finds MaterialSampleRecords whose SamplingEvent references this GeospatialCoordLocation via the p__sample_location column.

5 See Also