Tables
The Table class provides a tabular data structure for structured data processing. Tables are the primary data type returned by read() when loading CSV, JSON arrays, and other tabular formats.
For reading and writing table data, see the I/O guide. For data pipelines and the data {} block, see Tables & Data.
Constructor
Table
Table(rows, columns?) -> TableCreates a table from an array of row objects. Optionally specify column names.
t = Table([
{ name: "Alice", age: 30, city: "NYC" },
{ name: "Bob", age: 25, city: "LA" },
{ name: "Carol", age: 35, city: "NYC" }
])Properties
rows
t.rows -> [Object]The underlying array of row objects.
columns
t.columns -> [String]The column names in order.
shape
t.shape -> (Int, Int)Returns (row_count, column_count).
t.shape // (3, 3)Query
where
t.where(predicate) -> TableFilters rows matching a predicate. Use .column shorthand for column access.
adults = t.where(.age >= 18)
nyc = t.where(.city == "NYC")select
t.select(...columns) -> TableReturns a table with only the specified columns.
names = t.select("name", "age")derive
t.derive(column = expr) -> TableAdds or replaces a column with a computed value.
t.derive(.name_upper = .name |> upper())
t.derive(.age_group = if .age >= 30 { "senior" } else { "junior" })sort_by
t.sort_by(column, desc?) -> TableSorts the table by a column. Optionally in descending order.
t.sort_by(.age)
t.sort_by(.age, desc: true)limit
t.limit(n) -> TableReturns the first n rows.
top_5 = t.sort_by(.age, desc: true).limit(5)Grouping and Aggregation
group_by
t.group_by(column) -> GroupedTableGroups rows by a column value. Must be followed by agg().
t.group_by(.city)agg
grouped.agg(name: agg_fn, ...) -> TableAggregates grouped data. Available aggregation functions:
| Function | Description |
|---|---|
agg_sum(column) | Sum of column values |
agg_count() | Number of rows in each group |
agg_mean(column) | Mean (average) of column values |
agg_median(column) | Median of column values |
agg_min(column) | Minimum column value |
agg_max(column) | Maximum column value |
summary = t
|> group_by(.city)
|> agg(
count: agg_count(),
avg_age: agg_mean(.age),
oldest: agg_max(.age)
)Joins
join
t.join(other, opts) -> TableJoins two tables on matching columns.
Options:
on-- the column to join on (same name in both tables)left/right-- column names if they differhow-- join type:"inner"(default),"left","outer"
users = Table([
{ id: 1, name: "Alice" },
{ id: 2, name: "Bob" }
])
orders = Table([
{ user_id: 1, amount: 50 },
{ user_id: 1, amount: 30 },
{ user_id: 2, amount: 70 }
])
result = users.join(orders, left: "id", right: "user_id")
// Inner join by default
left_result = users.join(orders, left: "id", right: "user_id", how: "left")Reshaping
pivot
t.pivot(index, columns, values) -> TablePivots rows into columns.
sales = Table([
{ region: "East", quarter: "Q1", revenue: 100 },
{ region: "East", quarter: "Q2", revenue: 150 },
{ region: "West", quarter: "Q1", revenue: 200 }
])
sales.pivot("region", "quarter", "revenue")
// { region: "East", Q1: 100, Q2: 150 }
// { region: "West", Q1: 200, Q2: nil }unpivot
t.unpivot(columns, names) -> TableConverts columns into rows (the inverse of pivot).
explode
t.explode(column) -> TableExpands array values in a column into separate rows.
t = Table([
{ name: "Alice", tags: ["admin", "user"] },
{ name: "Bob", tags: ["user"] }
])
t.explode("tags")
// { name: "Alice", tags: "admin" }
// { name: "Alice", tags: "user" }
// { name: "Bob", tags: "user" }Cleaning
drop_duplicates
t.drop_duplicates(columns?) -> TableRemoves duplicate rows. Optionally specify columns to check for uniqueness.
t.drop_duplicates()
t.drop_duplicates(["name", "email"])rename
t.rename(old, new) -> TableRenames a column.
t.rename("name", "full_name")cast
t.cast(column, type) -> TableConverts a column to a different type.
t.cast("age", "Int")
t.cast("price", "Float")drop_nil
t.drop_nil(column?) -> TableRemoves rows where the specified column is nil. If no column is specified, removes rows where any column is nil.
t.drop_nil(.email)
t.drop_nil()fill_nil
t.fill_nil(column, value) -> TableReplaces nil values in a column with a default value.
t.fill_nil(.score, 0)
t.fill_nil(.status, "unknown")Inspection
peek
t.peek(n?, title?) -> TablePrints a preview of the table and returns it (passthrough for pipelines). Shows the first n rows (default 10).
data |> peek()
data |> peek(5, title: "Sample data")describe
t.describe() -> TableReturns summary statistics for numeric columns (count, mean, std, min, max, quartiles).
t.describe() |> peek()schema_of
schema_of(table) -> ObjectReturns the inferred schema (column names and types) of a table.
schema_of(users)
// { name: "String", age: "Int", email: "String" }Combination
union
union(a, b) -> TableCombines two tables with the same columns, appending all rows.
all_users = union(active_users, inactive_users)Pipeline Example
result = read("sales.csv")
|> drop_nil(.amount)
|> fill_nil(.region, "Unknown")
|> where(.amount > 0)
|> derive(.quarter = date_format(.date, "QQ YYYY"))
|> group_by(.region)
|> agg(
total: agg_sum(.amount),
count: agg_count(),
avg: agg_mean(.amount)
)
|> sort_by(.total, desc: true)
|> peek(title: "Revenue by Region")
result |> write("revenue_summary.csv")