H3 Quickstart (Databricks SQL)
The H3 geospatial functions quickstart on this page illustrates the following:
How to load geolocation dataset(s) into the Unity Catalog.
How to convert latitude and longitude columns to H3 cell columns.
How to convert zip code polygon or multipolygon WKT columns to H3 cell columns.
How to query for pickup and dropoff analysis from the LaGuardia Airport to Manhattan’s Financial District.
How to render H3 aggregate counts on a map.
Example notebooks and queries
Prepare Unity Catalog Data
In this notebook we:
Set up the public taxi dataset from Databricks Filesystem.
Set up the NYC Zip Code dataset.
Databricks SQL Queries with Databricks Runtime 11.3 LTS and above
Query 1: Verify base data has been setup. See Notebook.
use catalog geospatial_docs;
use database nyc_taxi;
show tables;
-- Verify initial data is setup (see instructions in setup notebook)
-- select format_number(count(*),0) as count from yellow_trip;
-- select * from nyc_zipcode;
Query 2: H3 NYC Zip Code - Apply h3_polyfillash3 at resolution 12
.
use catalog geospatial_docs;
use database nyc_taxi;
-- drop table if exists nyc_zipcode_h3_12;
create table if not exists nyc_zipcode_h3_12 as (
select
explode(h3_polyfillash3(geom_wkt, 12)) as cell,
zipcode,
po_name,
county
from
nyc_zipcode
);
-- optional: zorder by `cell`
optimize nyc_zipcode_h3_12 zorder by (cell);
select
*
from
nyc_zipcode_h3_12;
Query 3: H3 Taxi Trips - Apply h3_longlatash3 at resolution 12
.
use catalog geospatial_docs;
use database nyc_taxi;
-- drop table if exists yellow_trip_h3_12;
create table if not exists yellow_trip_h3_12 as (
select
h3_longlatash3(pickup_longitude, pickup_latitude, 12) as pickup_cell,
h3_longlatash3(dropoff_longitude, dropoff_latitude, 12) as dropoff_cell,
*
except
(
rate_code_id,
store_and_fwd_flag
)
from
yellow_trip
);
-- optional: zorder by `pickup_cell`
-- optimize yellow_trip_h3_12 zorder by (pickup_cell);
select
*
from
yellow_trip_h3_12
where pickup_cell is not null;
Query 4: H3 LGA Pickups - 25M pickups from LaGuardia (LGA)
use catalog geospatial_docs;
use database nyc_taxi;
create
or replace view lga_pickup_h3_12 as (
select
t.*
except(cell),
s.*
from
yellow_trip_h3_12 as s
inner join nyc_zipcode_h3_12 as t on s.pickup_cell = t.cell
where
t.zipcode = '11371'
);
select
format_number(count(*), 0) as count
from
lga_pickup_h3_12;
-- select
-- *
-- from
-- lga_pickup_h3_12;
Query 5: H3 Financial District Dropoffs - 34M total drop offs in Financial District
use catalog geospatial_docs;
use database nyc_taxi;
create
or replace view fd_dropoff_h3_12 as (
select
t.*
except(cell),
s.*
from
yellow_trip_h3_12 as s
inner join nyc_zipcode_h3_12 as t on s.dropoff_cell = t.cell
where
t.zipcode in ('10004', '10005', '10006', '10007', '10038')
);
select
format_number(count(*), 0) as count
from
fd_dropoff_h3_12;
-- select * from fd_dropoff_h3_12;
Query 6: H3 LGA-FD - 827K drop offs in FD with pickup from LGA
use catalog geospatial_docs;
use database nyc_taxi;
create
or replace view lga_fd_dropoff_h3_12 as (
select
*
from
fd_dropoff_h3_12
where
pickup_cell in (
select
distinct pickup_cell
from
lga_pickup_h3_12
)
);
select
format_number(count(*), 0) as count
from
lga_fd_dropoff_h3_12;
-- select * from lga_fd_dropoff_h3_12;
Query 7: LGA-FD by zip code - Count FD drop offs by zip code + bar chart
use catalog geospatial_docs;
use database nyc_taxi;
select
zipcode,
count(*) as count
from
lga_fd_dropoff_h3_12
group by
zipcode
order by
zipcode;
Query 8: LGA-FD by H3 - Count FD drop offs by H3 cell + map marker visualization
use catalog geospatial_docs;
use database nyc_taxi;
select
zipcode,
dropoff_cell,
h3_centerasgeojson(dropoff_cell) :coordinates [0] as dropoff_centroid_x,
h3_centerasgeojson(dropoff_cell) :coordinates [1] as dropoff_centroid_y,
format_number(count(*), 0) as count_disp,
count(*) as `count`
from
lga_fd_dropoff_h3_12
group by
zipcode,
dropoff_cell
order by
zipcode,
`count` DESC;
Notebooks for Databricks Runtime 11.3 LTS and above
Same quickstart structure as in Databricks SQL, using Spark Python bindings within Notebooks + kepler.gl.
Same quickstart structure as in Databricks SQL, using Spark Scala bindings within Notebooks + kepler.gl via Python cells.
Same quickstart structure as in Databricks SQL, using Spark SQL bindings within Notebooks + kepler.gl via Python cells.