import ibis
import ibis.selectors as s
ibis.__version__
'8.0.0'
Ibis team
August 2, 2023
Ibis 6.1.0 is a minor release that includes new features, backend improvements, bug fixes, documentation improvements, and refactors. We are excited to see further adoption of the dataframe interchange protocol enabling visualization and other libraries to be used more easily with Ibis.
You can view the full changelog in the release notes.
If you’re new to Ibis, see how to install and the getting started tutorial.
To follow along with this blog, ensure you’re on 'ibis-framework>=6.1,<7'
. First, we'll setup Ibis and fetch some sample data to use.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┘
With the introduction of __dataframe__
support in v6.0.0 and efficiency improvements in this release, Ibis now works with Altair, Plotly, plotnine, and any other visualization library that implements the protocol. This enables passing Ibis tables directly to visualization libraries without a .to_pandas()
or to_pyarrow()
call for any of the 15+ backends supported, with data efficiently transferred through Apache Arrow.
grouped = (
t.group_by("species")
.aggregate(count=ibis._.count())
.order_by(ibis.desc("count"))
)
grouped
┏━━━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ count ┃ ┡━━━━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├───────────┼───────┤ │ Adelie │ 152 │ │ Gentoo │ 124 │ │ Chinstrap │ 68 │ └───────────┴───────┘
A more modular, composable, and scalable way of working with data is taking shape with __dataframe__
and __array__
support in Ibis and increasingly the Python data ecosystem. Let's combine the above with PCA after some preprocessing in Ibis to visualize all numeric columns in 2D.
import ibis.selectors as s
def transform(t):
t = t.mutate(
s.across(s.numeric(), {"zscore": lambda x: (x - x.mean()) / x.std()})
).dropna()
return t
f = transform(t)
f
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ bill_length_mm_zscore ┃ bill_depth_mm_zscore ┃ flipper_length_mm_zscore ┃ body_mass_g_zscore ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ float64 │ float64 │ float64 │ float64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────────────┼──────────────────────┼──────────────────────────┼────────────────────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ -0.883205 │ 0.784300 │ -1.416272 │ -0.563317 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ -0.809939 │ 0.126003 │ -1.060696 │ -0.500969 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ -0.663408 │ 0.429833 │ -0.420660 │ -1.186793 │ │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │ -1.322799 │ 1.088129 │ -0.562890 │ -0.937403 │ │ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │ -0.846572 │ 1.746426 │ -0.776236 │ -0.688012 │ │ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │ -0.919837 │ 0.328556 │ -1.416272 │ -0.719186 │ │ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │ -0.864888 │ 1.240044 │ -0.420660 │ 0.590115 │ │ Adelie │ Torgersen │ 41.1 │ 17.6 │ 182 │ 3200 │ female │ 2007 │ -0.516876 │ 0.227280 │ -1.345156 │ -1.249141 │ │ Adelie │ Torgersen │ 38.6 │ 21.2 │ 191 │ 3800 │ male │ 2007 │ -0.974787 │ 2.050255 │ -0.705121 │ -0.500969 │ │ Adelie │ Torgersen │ 34.6 │ 21.1 │ 198 │ 4400 │ male │ 2007 │ -1.707443 │ 1.999617 │ -0.207315 │ 0.247203 │ │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────────────┴──────────────────────┴──────────────────────────┴────────────────────┘
import plotly.express as px
from sklearn.decomposition import PCA
X = f.select(s.contains("zscore"))
n_components = 3
pca = PCA(n_components=n_components).fit(X)
t_pca = ibis.memtable(pca.transform(X)).relabel(
{"col0": "pc1", "col1": "pc2", "col2": "pc3"}
)
f = f.mutate(row_number=ibis.row_number().over()).join(
t_pca.mutate(row_number=ibis.row_number().over()), "row_number"
)
px.scatter_3d(
f.to_pandas(),
x="pc1",
y="pc2",
z="pc3",
color="species",
)
Numerous backends received improvements. See the release notes for more details.
The DataFusion backend (and a few others) received several improvements from community member @mesejo with memtables and many new operations now supported. Some highlights include:
Some remaining gaps in CREATE TABLE
DDL options for BigQuery have been filled in, including the ability to pass in overwrite=True
for table creation.
The PySpark backend now supports reading/writing Delta Lake tables. Your PySpark session must be configured to use the Delta Lake package and you must have the delta
package installed in your environment.
The .sql
API is now supported in Trino, enabling you to chain Ibis and SQL together.
Scalar Python UDFs are now supported in SQLite.
Additionally, URL parsing has been added:
Various new features and were added.
.nunique()
supported on tablesYou can now call .nunique()
on tables to get the number of unique rows.
to_sql
returns a str
typeThe ibis.expr.sql.SQLString
type resulting from to_sql
is now a proper str
subclass, enabling use without casting to str
first.
ibis.array
Note that arrays must still be of a single type.
┏━━━━━━━━━━━━━━━━━━━━━┓ ┃ Array() ┃ ┡━━━━━━━━━━━━━━━━━━━━━┩ │ array<string> │ ├─────────────────────┤ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ ['Adelie', 'hello'] │ │ … │ └─────────────────────┘
concat
and repeat
methodsYou can still use +
or *
in typical Python fashion, with new and more explicit concat
and repeat
methods added in this release.
This allows for joins with boolean predicates.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ species_right ┃ island_right ┃ bill_length_mm_right ┃ bill_depth_mm_right ┃ flipper_length_mm_right ┃ body_mass_g_right ┃ sex_right ┃ year_right ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ NULL │ NULL │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ NULL │ 2007 │ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ NULL │ 2007 │ │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ species_right ┃ island_right ┃ bill_length_mm_right ┃ bill_depth_mm_right ┃ flipper_length_mm_right ┃ body_mass_g_right ┃ sex_right ┃ year_right ┃ ┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ └─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ species_right ┃ island_right ┃ bill_length_mm_right ┃ bill_depth_mm_right ┃ flipper_length_mm_right ┃ body_mass_g_right ┃ sex_right ┃ year_right ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ NULL │ NULL │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ NULL │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ NULL │ 2007 │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ NULL │ │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘
Several internal refactors that shouldn't affect normal usage were made. See the release notes for more details.
Ibis v6.1.0 brings exciting enhancements to the library that enable broader ecosystem adoption of Python standards.
As always, try Ibis by installing and getting started.
If you run into any issues or find support is lacking for your backend, open an issue or discussion and let us know!