Source code for eazieda.histograms
import pandas as pd
import altair as alt
import numpy as np
[docs]def histograms(data, features, plot_width=100, plot_height=100, num_cols=2):
"""
Generates histograms for numeric features and
bar plots for categorical features
Parameters
----------
data : pandas.core.frame.DataFrame
A Pandas Dataframe
features : list
A list of strings that represents feature names
plot_width: int
The width of each features sub plot. Default = 100
plot_height: int
The height of each features sub plot. Default = 100
num_cols : int
The number of columns in the final grid of plots. Default = 2
Returns
-------
`altair plot`
A combined altair correlation plot
Examples
--------
>>> from eazieda.histograms import histograms
>>> from vega_datasets import data
>>> df = data.iris()
>>> histograms(df, ['petalLength', 'petalWidth', 'sepalLength'],
>>> num_cols=2)
"""
if not isinstance(data, pd.DataFrame):
raise ValueError("Please pass in a Pandas DataFrame for `data`")
elif len(set(features).intersection(set(data.columns))) != len(features):
raise ValueError("All features must be present in dataframe")
elif (not isinstance(plot_width, int)) or (
not isinstance(plot_height, int)
):
raise ValueError("plot_width and plot_height must be integer")
# Use data types to determine which columns to plot on which chart
numeric_cols = set(
data.select_dtypes(include=np.number).columns
).intersection(features)
cat_cols = set(
data.select_dtypes(include=["category", "object"]).columns
).intersection(features)
numeric_chart = (
alt.Chart(data)
.transform_fold(list(numeric_cols), as_=["Numeric Features", "value"])
.mark_bar()
.encode(alt.X("value:Q", title="value", bin=True), y="count()")
.properties(width=plot_width, height=plot_height)
.facet(facet="Numeric Features:N", columns=num_cols)
.resolve_scale(x="independent")
)
# data.sample done here due to Altair referencing same data frame
# otherwise and causing errors
# in concatenation later
categorical_chart = (
alt.Chart(data.sample(data.shape[0], random_state=42))
.transform_fold(list(cat_cols), as_=["Categorical Features", "value"])
.mark_bar()
.encode(alt.X("value:N"), y="count()")
.properties(width=plot_width, height=plot_height)
.facet(facet="Categorical Features:N", columns=num_cols)
)
final_chart = numeric_chart & categorical_chart
final_chart.title = "Histograms for Numeric and Categorical Features"
return final_chart