DataFrame Sparklines

An approach to sparklines within pandas dataframes using matplotlib.

In [1]:
import base64
import requests
import numpy as np
import pandas as pd
from time import sleep
from itertools import chain
from cStringIO import StringIO
from datetime import timedelta, date
from IPython.display import display, HTML

%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [2]:
# Turn off the max column width so the HTML 
# image tags don't get truncated 
pd.set_option('display.max_colwidth', -1)

# Turning off the max column will display all the data in
# our arrays so limit the number of element to display
pd.set_option('display.max_seq_items', 2)
In [3]:
def sparkline(data, figsize=(4, 0.25), **kwags):
    """
    Returns a HTML image tag containing a base64 encoded sparkline style plot
    """
    data = list(data)
    
    fig, ax = plt.subplots(1, 1, figsize=figsize, **kwags)
    ax.plot(data)
    for k,v in ax.spines.items():
        v.set_visible(False)
    ax.set_xticks([])
    ax.set_yticks([])    

    plt.plot(len(data) - 1, data[len(data) - 1], 'r.')

    ax.fill_between(range(len(data)), data, len(data)*[min(data)], alpha=0.1)
    
    img = StringIO()
    plt.savefig(img)
    img.seek(0)
    plt.close()
    return '<img src="data:image/png;base64,{}"/>'.format(base64.b64encode(img.read()))

Random Data Example

In [4]:
n = 100

data = [
    ('rand',        np.random.rand(n)), 
    ('randn',       np.random.randn(n)), 
    ('beta',        np.random.beta(2, 1, size=n)),
    ('binomial',    np.random.binomial(3.4, 0.22, size=n)),
    ('exponential', np.random.exponential(size=n)),
    ('geometric',   np.random.geometric(0.5, size=n)),
    ('laplace',     np.random.laplace(size=n))
]
spark = pd.DataFrame(data, columns=['func', 'data'])
spark
Out[4]:
func data
0 rand [0.642376949857, 0.621500294112, ...]
1 randn [-0.0912258299196, -0.589687946578, ...]
2 beta [0.764326188995, 0.863402838683, ...]
3 binomial [0, 2, ...]
4 exponential [0.315013416556, 1.81946417402, ...]
5 geometric [2, 3, ...]
6 laplace [-0.177821026071, 1.72779222503, ...]
In [5]:
# map the sparkline function over the data column
# and store back in the column sparklines
spark['sparklines'] = spark.data.map(sparkline)

# _repr_html_ escapes HTML so manually handle the rendering
HTML(spark.to_html(escape=False))
Out[5]:
func data sparklines
0 rand [0.642376949857, 0.621500294112, ...]
1 randn [-0.0912258299196, -0.589687946578, ...]
2 beta [0.764326188995, 0.863402838683, ...]
3 binomial [0, 2, ...]
4 exponential [0.315013416556, 1.81946417402, ...]
5 geometric [2, 3, ...]
6 laplace [-0.177821026071, 1.72779222503, ...]

Aggregation Example

Query fixer.io for currency exchange data against the Euro. Aggregate by country and display sparklines.

In [6]:
def get_fixer_rates(date):
    """
    Query fixer.io for currency exchange data
    """
    res = requests.get("http://api.fixer.io/%s" % date)
    sleep(0.5)  # be kind to the API!
    json = res.json()
    return [(date, json['base'], k, v) for k, v in json['rates'].iteritems()]
In [7]:
def daterange(start_date, end_date, fmt="%Y-%m-%d"):
    for n in range(int((end_date - start_date).days)):
        yield (start_date + timedelta(n)).strftime(fmt)

dates = daterange(date(2014, 1, 1), date.today())
rates = list(chain.from_iterable([get_fixer_rates(date) for date in dates]))
In [8]:
currency = pd.DataFrame(rates, columns=['date', 'base', 'country', 'rates'])
currency = currency[~currency['country'].isin(['LVL', 'LTL'])]
currency.head()
Out[8]:
date base country rates
0 2014-01-01 EUR USD 1.3791
1 2014-01-01 EUR IDR 16764.7800
2 2014-01-01 EUR BGN 1.9558
3 2014-01-01 EUR ILS 4.7880
4 2014-01-01 EUR GBP 0.8337
In [9]:
rates = currency.groupby(['country']).aggregate({'rates': sparkline})
HTML(rates.to_html(escape=False))
Out[9]:
rates
country
AUD
BGN
BRL
CAD
CHF
CNY
CZK
DKK
GBP
HKD
HRK
HUF
IDR
ILS
INR
JPY
KRW
MXN
MYR
NOK
NZD
PHP
PLN
RON
RUB
SEK
SGD
THB
TRY
USD
ZAR
In [ ]: