DataFrame Sparklines

An approach to sparklines within pandas dataframes using matplotlib.

import base64
import requests
import numpy as np
import pandas as pd
from time import sleep
from itertools import chain
from cStringIO import StringIO
from datetime import timedelta, date
from IPython.display import display, HTML

%pylab inline

Populating the interactive namespace from numpy and matplotlib

# Turn off the max column width so the HTML 
# image tags don't get truncated 
pd.set_option('display.max_colwidth', -1)

# Turning off the max column will display all the data in
# our arrays so limit the number of element to display
pd.set_option('display.max_seq_items', 2)

def sparkline(data, figsize=(4, 0.25), **kwags):
    """
    Returns a HTML image tag containing a base64 encoded sparkline style plot
    """
    data = list(data)
    
    fig, ax = plt.subplots(1, 1, figsize=figsize, **kwags)
    ax.plot(data)
    for k,v in ax.spines.items():
        v.set_visible(False)
    ax.set_xticks([])
    ax.set_yticks([])    

    plt.plot(len(data) - 1, data[len(data) - 1], 'r.')

    ax.fill_between(range(len(data)), data, len(data)*[min(data)], alpha=0.1)
    
    img = StringIO()
    plt.savefig(img)
    img.seek(0)
    plt.close()
    return '<img src="data:image/png;base64,{}"/>'.format(base64.b64encode(img.read()))

Random Data Example¶

n = 100

data = [
    ('rand',        np.random.rand(n)), 
    ('randn',       np.random.randn(n)), 
    ('beta',        np.random.beta(2, 1, size=n)),
    ('binomial',    np.random.binomial(3.4, 0.22, size=n)),
    ('exponential', np.random.exponential(size=n)),
    ('geometric',   np.random.geometric(0.5, size=n)),
    ('laplace',     np.random.laplace(size=n))
]
spark = pd.DataFrame(data, columns=['func', 'data'])
spark

# map the sparkline function over the data column
# and store back in the column sparklines
spark['sparklines'] = spark.data.map(sparkline)

# _repr_html_ escapes HTML so manually handle the rendering
HTML(spark.to_html(escape=False))

Aggregation Example¶

Query fixer.io for currency exchange data against the Euro. Aggregate by country and display sparklines.

def get_fixer_rates(date):
    """
    Query fixer.io for currency exchange data
    """
    res = requests.get("http://api.fixer.io/%s" % date)
    sleep(0.5)  # be kind to the API!
    json = res.json()
    return [(date, json['base'], k, v) for k, v in json['rates'].iteritems()]

def daterange(start_date, end_date, fmt="%Y-%m-%d"):
    for n in range(int((end_date - start_date).days)):
        yield (start_date + timedelta(n)).strftime(fmt)

dates = daterange(date(2014, 1, 1), date.today())
rates = list(chain.from_iterable([get_fixer_rates(date) for date in dates]))

currency = pd.DataFrame(rates, columns=['date', 'base', 'country', 'rates'])
currency = currency[~currency['country'].isin(['LVL', 'LTL'])]
currency.head()

rates = currency.groupby(['country']).aggregate({'rates': sparkline})
HTML(rates.to_html(escape=False))

	date	base	country	rates
0	2014-01-01	EUR	USD	1.3791
1	2014-01-01	EUR	IDR	16764.7800
2	2014-01-01	EUR	BGN	1.9558
3	2014-01-01	EUR	ILS	4.7880
4	2014-01-01	EUR	GBP	0.8337

	rates
country
AUD
BGN
BRL
CAD
CHF
CNY
CZK
DKK
GBP
HKD
HRK
HUF
IDR
ILS
INR
JPY
KRW
MXN
MYR
NOK
NZD
PHP
PLN
RON
RUB
SEK
SGD
THB
TRY
USD
ZAR

	func	data
0	rand	[0.642376949857, 0.621500294112, ...]
1	randn	[-0.0912258299196, -0.589687946578, ...]
2	beta	[0.764326188995, 0.863402838683, ...]
3	binomial	[0, 2, ...]
4	exponential	[0.315013416556, 1.81946417402, ...]
5	geometric	[2, 3, ...]
6	laplace	[-0.177821026071, 1.72779222503, ...]