Example scripts

A list of examples of scripts to use in your Dataform projects.

Basic examples

Create a view

definitions/new_view.sqlx
1config { type: "view" }
2
3select * from source_data

Create a table

definitions/new_table.sqlx
1config { type: "table" }
2
3select * from source_data

Use the ref function

definitions/new_table_with_ref.sqlx
1config { type: "table" }
2
3select * from ${ref("source_data")}

Run several SQL operations

definitions/operations.sqlx
1
2config { type: "operations" }
3
4delete from datatable where country = 'GB'
5---
6delete from datatable where country = 'FR'

Add documentation to a table, view, or declaration

definitions/documented_table.sqlx
1
2config { type: "table",
3         description: "This table is an example",
4         columns:{
5             user_name: "Name of the user",
6             user_id: "ID of the user"
7         } }
8
9select user_name, user_id from ${ref("source_data")}

Add assertions to a table, view, or declaration

definitions/tested_table.sqlx
1
2config {
3  type: "table",
4  assertions: {
5    uniqueKey: ["user_id"],
6    nonNull: ["user_id", "customer_id"],
7    rowConditions: [
8      'signup_date is null or signup_date > "2019-01-01"',
9      'email like "%@%.%"'
10    ]
11  }
12}
13select ...

Add a custom assertion

definitions/custom_assertion.sqlx
1
2config { type: "assertion" }
3
4select
5  *
6from
7  ${ref("source_data")}
8where
9  a is null
10  or b is null
11  or c is null

Run custom SQL before or after creating a table

definitions/table_with_preops_and_postops.sqlx
1
2config {type: "table"}
3
4select * from ...
5
6pre_operations {
7  insert into table ...
8}
9
10post_operations {
11  grant select on ${self()} to group "allusers@dataform.co"
12  ---
13  grant select on ${self()} to group "allotherusers@dataform.co"
14}

Incremental tables examples

Add new rows dates for new dates in source data

definitions/incremental_table.sqlx
1
2config { type: "incremental" }
3
4select date(timestamp) as date, action
5from weblogs.user_actions
6
7${ when(incremental(), `where timestamp > (select max(date) from ${self()})`)

Take a snapshot of a table periodically

definitions/snapshots_table.sqlx
1
2config { type: "incremental" }
3
4SELECT current_date() AS snapshot_date, customer_id, name, account_settings FROM productiondb.customers
5
6${ when(incremental(), `where snapshot_date > (select max(snapshot_date) from ${self()})`) }

Examples of includes

Use global variables

includes/contants.js
1const project_id = "project_id";
2const first_date = "'1970-01-01'";
3module.exports = {
4  project_id,
5  first_date
6};

definitions/new_table.sqlx
1config {type: "table"}
2
3select * from source_table where date > ${contants.first_date}

Create a country mapping

includes/mapping.js
1function country_group(country){
2  return `
3  case
4    when ${country} in ('US', 'CA') then 'NA'
5    when ${country} in ('GB', 'FR', 'DE', 'IT', 'PL', 'SE') then 'EU'
6    when ${country} in ('AU') then ${country}
7    else 'Other'
8  end`;

definitions/new_table.sqlx
1config { type: "table"}
2
3select
4  country as country,
5  ${mapping.country_group("country")} as country_group,
6  device_type as device_type,
7  sum(revenue) as revenue,
8  sum(pageviews) as pageviews,
9  sum(sessions) as sessions
10
11from ${ref("source_table")}
12
13group by 1, 2, 3

compiled.sql
1select
2  country as country,
3  case
4    when country in ('US', 'CA') then 'NA'
5    when country in ('GB', 'FR', 'DE', 'IT', 'PL', 'SE') then 'EU'
6    when country in ('AU') then country
7    else 'Other'
8  end as country_group,
9  device_type as device_type,
10  sum(revenue) as revenue,
11  sum(pageviews) as pageviews,
12  sum(sessions) as sessions
13
14from "dataform"."source_table"
15
16group by 1, 2, 3

Generate a SQL script with a custom function

includes/script_builder.js
1function render_script(table, dimensions, metrics) {
2  return `
3      select
4      ${dimensions.map(field => `${field} as ${field}`).join(",")},
5      ${metrics.map(field => `sum(${field}) as ${field}`).join(",\n")}
6      from ${table}
7      group by ${dimensions.map((field, i) => `${i + 1}`).join(", ")}
8    `;
9}
10
11module.exports = { render_script };

definitions/new_table.sqlx
1config {
2    type: "table",
3    tags: ["advanced", "hourly"],
4    disabled: true
5}
6
7${script_builder.render_script(ref("source_table"),
8                               ["country", "device_type"],
9                               ["revenue", "pageviews", "sessions"]
10                               )}

compiled.sql
1select
2  country as country,
3  device_type as device_type,
4  sum(revenue) as revenue,
5  sum(pageviews) as pageviews,
6  sum(sessions) as sessions
7
8from "dataform"."source_table"
9
10group by 1, 2

Extracting browser and platform from Segment's context_user_agent field

includes/segment.js
1
2// source: https://discourse.looker.com/t/parsing-user-agent-into-device-type-manufacturer-browser/1206/2
3
4function platformStart(user_agent) {
5  return `STRPOS(${user_agent}, '(') + 1`;
6}
7
8function platformRaw(user_agent) {
9  return `SUBSTR(${user_agent}, ${platformStart(user_agent)}, 100)`;
10}
11
12function platformEndInitial(user_agent) {
13  return `
14    CASE
15      WHEN STRPOS(${platformRaw(user_agent)}, ';') = 0
16      THEN STRPOS(${platformRaw(user_agent)}, ')')
17      ELSE STRPOS(${platformRaw(user_agent)}, ';')
18    END`;
19}
20
21function platformEnd(user_agent) {
22  return `
23CASE WHEN ${platformEndInitial(user_agent)} = 0 THEN 0 ELSE ${platformEndInitial(user_agent)} - 1 END`;
24}
25
26function platform(user_agent) {
27  return `SUBSTR(${user_agent}, ${platformStart(user_agent)}, ${platformEnd(user_agent)})`;
28}
29
30function browser(user_agent) {
31  return `
32    CASE
33      WHEN ${user_agent} LIKE '%Firefox/%' THEN 'Firefox'
34      WHEN ${user_agent} LIKE '%Chrome/%' OR ${user_agent} LIKE '%CriOS%' THEN 'Chrome'
35      WHEN ${user_agent} LIKE '%MSIE %' THEN 'IE'
36      WHEN ${user_agent} LIKE '%MSIE+%' THEN 'IE'
37      WHEN ${user_agent} LIKE '%Trident%' THEN 'IE'
38      WHEN ${user_agent} LIKE '%iPhone%' THEN 'iPhone Safari'
39      WHEN ${user_agent} LIKE '%iPad%' THEN 'iPad Safari'
40      WHEN ${user_agent} LIKE '%Opera%' THEN 'Opera'
41      WHEN ${user_agent} LIKE '%BlackBerry%' AND ${user_agent} LIKE '%Version/%' THEN 'BlackBerry WebKit'
42      WHEN ${user_agent} LIKE '%BlackBerry%' THEN 'BlackBerry'
43      WHEN ${user_agent} LIKE '%Android%' THEN 'Android'
44      WHEN ${user_agent} LIKE '%Safari%' THEN 'Safari'
45      WHEN ${user_agent} LIKE '%bot%' THEN 'Bot'
46      WHEN ${user_agent} LIKE '%http://%' THEN 'Bot'
47      WHEN ${user_agent} LIKE '%www.%' THEN 'Bot'
48      WHEN ${user_agent} LIKE '%Wget%' THEN 'Bot'
49      WHEN ${user_agent} LIKE '%curl%' THEN 'Bot'
50      WHEN ${user_agent} LIKE '%urllib%' THEN 'Bot'
51      ELSE 'Unknown'
52    END`;
53}
54
55module.exports = {
56  platform,
57  browser
58}

1// definitions/top_browser_platforms
2
3config{
4  type: "view"
5}
6
7select
8  ${segment.browser("context_user_agent")} as browser,
9  ${segment.platform("context_user_agent")} as platform,
10  count(distinct user_id) as users
11from
12  javascript.pages
13group by
14  1,2
15order by
16  users desc
17limit 20;

Examples using the JS API

Generating one table per country

definitions/one_table_per_country.js
1
2const countries = ["GB", "US", "FR", "TH", "NG"];
3
4countries.forEach(country => {
5  publish("reporting_" + country)
6    .dependencies(["source_table"])
7    .query(
8      ctx => `
9      select '${country}' as country
10      `
11    );
12});

Declaring multiple sources within one file

definitions/external_dependencies.js
1
2declare({
3  schema: "stripe",
4  name: "charges"
5});
6
7declare({
8  schema: "shopify",
9  name: "orders"
10});
11
12declare({
13  schema: "salesforce",
14  name: "accounts"
15});

Declaring multiple sources within one file using forEach

definitions/external_dependencies.js
1
2["charges", "subscriptions", "line_items", "invoices"].
3  forEach(name => declare({
4    schema: "stripe",
5    name})
6);

Deleting sensitive information in all tables containing PII

definitions/delete_pii.js
1
2const pii_tables = ["users", "customers", "leads"];
3pii_tables.forEach(table =>
4  operate(`gdpr_cleanup: ${table}`,
5    ctx => `
6      delete from raw_data.${table}
7      where user_id in (select * from users_who_requested_deletion)`)
8      .tags(["gdpr_deletion"]))
9);

Adding preOps and postOps using the JS API

definitions/pre_and_post_ops_example.js
1
2publish("example")
3  .query(ctx => `SELECT * FROM ${ctx.ref("other_table")}`)
4  .preOps(ctx => `delete ${ctx.self()}`)
5  .postOps(ctx => `grant select on ${ctx.self()} to role`)

Creating incremental tables using the JS API

definitions/incremental_example.js
1
2publish("incremental_example", {
3  type: "incremental"
4}).query(ctx => `
5  SELECT * FROM ${ctx.ref("other_table")}
6  ${ctx.when(ctx.incremental(),`WHERE timestamp > (SELECT MAX(date) FROM ${ctx.self()}`)}
7`)

Misc

Use inline variables and functions

definitions/script_with_variables.sqlx
1
2js {
3 const foo = 1;
4 function bar(number){
5     return number+1;
6 }
7}
8
9select
10 ${foo} as one,
11 ${bar(foo)} as two

Perfom a unit test on a SQL query

definitions/query_to_be_tested.sqlx
1select
2  floor(age / 5) * 5 as age_group,
3  count(1) as user_count
4from ${ref("source_table")}
5group by age_group
6order by age_group

definitions/unit_test_on_query.sqlx
1config {
2  type: "test",
3  dataset: "source_table"
4}
5
6input "ages" {
7  select 15 as age union all
8  select 21 as age union all
9  select 24 as age union all
10  select 34 as age
11}
12
13select 15 as age_group, '1' as user_count union all
14select 20 as age_group, '2' as user_count union all
15select 30 as age_group, '1' as user_count

Backfill a daily table

definitions/backfill_daily_data.js
1
2var getDateArray = function(start, end) {
3  var startDate = new Date(start); //YYYY-MM-DD
4  var endDate = new Date(end); //YYYY-MM-DD
5
6  var arr = new Array();
7  var dt = new Date(startDate);
8  while (dt <= endDate) {
9    arr.push(new Date(dt).toISOString().split("T")[0]);
10    dt.setDate(dt.getDate() + 1);
11  }
12  return arr;
13};
14
15var dateArr = getDateArray("2020-03-01", "2020-04-01");
16
17// step 1: create table
18operate(`create table`, 'create table if not exists backfill_table (`fields`) `);
19// step 2: insert into the table
20
21dateArr.forEach((day, i) =>
22  operate(`backfill ${day}`
23   `insert into backfill_table select fields where day = '${day}'`)
24);

Build a rolling 30-days table that update incrementally

definitions/incremental_example.sql
1config {type: "incremental"}
2
3postOperations {
4  delete from ${self()} where date < (date_add(Day, -30, CURRENT_DATE))
5}
6
7select
8 date(timestamp) as date,
9 order_id,
10from source_table
11  ${ when(incremental(), `where timestamp > (select max(date) from ${self()})`) }

What's next

Example projects

A list of example projects for Snowflake, BigQuery, and Redshift.

Sitemap