Creating long dataframes from puffy tables¶

This tutorial will show the basic features of using the puffbird.puffy_to_long method

[1]:

import numpy as np
import pandas as pd
import puffbird as pb

A weirdly complex table¶

First, we will create a puffy dataframe as an example:

[2]:

df = pd.DataFrame({
    # a and c have the same data repeated three times
    # b is just a bunch of numpy arrays of the same shapes
    # d is also just a bunch of numpy arrays of different shapes
    # e contains various pandas DataFrames with the same column structures
    #   and the same index format.
    # f contains various pandas DataFrames with different structures
    # g contains mixed data types
    # missing data is also included
    'a': [
        'aa', 'bb', 'cc', 'dd',
        'aa', 'bb', 'cc', 'dd',
        'aa', 'bb', 'cc', 'dd'
    ],
    'b': [
        np.random.random((10, 5)),
        np.nan,
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5)),
        np.random.random((10, 5))
    ],
    'c': [
        {'dicta':[1,2,3], 'dictb':3, 'dictc':{'key1':1, 'key2':2}},
        {'dicta':[52,3], 'dictb':[3,4], 'dictc':{'key4':1, 'key2':2}},
        {'dicta':[12,67], 'dictb':(4,5), 'dictc':{'key3':1, 'key2':77}},
        {'dicta':[1,23], 'dictb':3, 'dictc':{'key1':55, 'key2':33}},
        {'dicta':123, 'dictb':'words', 'dictc':{'key1':4, 'key2':2}},
        {'dicta':[1,2,3], 'dictb':3, 'dictc':{'key1':1, 'key2':2}},
        {'dicta':[52,3], 'dictb':[3,4], 'dictc':{'key4':1, 'key2':2}},
        {'dicta':[12,67], 'dictb':(4,5), 'dictc':{'key3':1, 'key2':77}},
        {'dicta':[1,23], 'dictb':3, 'dictc':{'key1':55, 'key2':33}},
        {'dicta':123, 'dictb':'words', 'dictc':{'key1':4, 'key2':2}},
        {'dicta':[1,2,3], 'dictb':3, 'dictc':{'key1':1, 'key2':2}},
        {'dicta':[52,3], 'dictb':[3,4], 'dictc':{'key4':1, 'key2':2}},
    ],
    'd': [
        np.random.random((16, 5)),
        np.random.random((18, 5)),
        np.random.random((19, 5)),
        np.random.random((11, 5)),
        np.random.random((12, 5)),
        np.random.random((14, 5)),
        np.random.random((17, 5)),
        np.random.random((110, 5)),
        None,
        np.random.random((2, 5)),
        np.random.random((4, 5)),
        np.random.random((7, 5))
    ],
    'e': [
        pd.DataFrame(
            {'c1':[1,2,3], 'c2':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[1,2,3,4], 'c2':[1,2,3,4]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        np.nan,
        pd.DataFrame(
            {'c1':[1,2,3], 'c2':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[1,2,3,4], 'c2':[1,2,3,4]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        np.nan,
        pd.DataFrame(
            {'c1':[1,2,3], 'c2':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[1,2,3,4], 'c2':[1,2,3,4], 'c3':[1,2,3,4]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        np.nan,
    ],
    'f': [
        pd.DataFrame(
            {'f1':[1,2,3], 'hh2':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c'], ['f', 'f', 'f']],
                names=['f', 'b', 'e']
            )
        ),
        pd.DataFrame(
            {'hh1':[1,2,3,4], 'qq2':[1,2,3,4]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'q1':[3,4,3], 'qq2':[3,5,3], 'c3':[1,2,3], 'c4':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c'], ['t', 't', 't']],
                names=['y', 'll', 'tt']
            )
        ),
        np.nan,
        pd.DataFrame(
            {'qq1':[1,2,3], 'rr2':[1,2,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            [[1,2,3,4], [1,2,3,4]],
            columns=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['rr', 'b']
            ),
            index=pd.MultiIndex.from_arrays(
                [[(1,2), (2,3)], ['a', 'b']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'cpp1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'rr']
            )
        ),
        np.nan,
        pd.DataFrame(
            {'sr1':[1,2,3,4], 'c2':[1,2,3,4]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'cpp1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['a', 'b']
            )
        ),
        pd.DataFrame(
            {'c1':[3,4,3], 'c2':[3,5,3]},
            index=pd.MultiIndex.from_arrays(
                [['a', 'b', 'c'], ['a', 'b', 'c']],
                names=['mm', 'b']
            )
        ),
        np.nan,
    ],
    'g': [
        'a', 'b', {'ff':'gg'}, {'a', 'b', 'c'},
        ('r',), pd.Series({'a':'b'}), 'a', 'b',
        1, 2, 3, 4
    ]
})

df

[2]:

	a	b	c	d	e	f	g
0	aa	[[0.9657556404566287, 0.6105982811179597, 0.71...	{'dicta': [1, 2, 3], 'dictb': 3, 'dictc': {'ke...	[[0.8332176695108217, 0.789958044060405, 0.294...	c1 c2 a b a a 1 1 b b 2 ...	f1 hh2 f b e a a f 1 1 b...	a
1	bb	NaN	{'dicta': [52, 3], 'dictb': [3, 4], 'dictc': {...	[[0.3132246893884286, 0.1335576065684959, 0.42...	c1 c2 a b a a 1 1 b b 2 ...	hh1 qq2 a b a a 1 1 b b ...	b
2	cc	[[0.2978295126291556, 0.2876008891935764, 0.85...	{'dicta': [12, 67], 'dictb': (4, 5), 'dictc': ...	[[0.06184828264219733, 0.18545094706008847, 0....	c1 c2 a b a a 3 3 b b 4 ...	q1 qq2 c3 c4 y ll tt ...	{'ff': 'gg'}
3	dd	[[0.4963928696663038, 0.7239167468580807, 0.98...	{'dicta': [1, 23], 'dictb': 3, 'dictc': {'key1...	[[0.8993257213551965, 0.2031570590614975, 0.66...	NaN	NaN	{a, c, b}
4	aa	[[0.3031081932962224, 0.6450296792517578, 0.32...	{'dicta': 123, 'dictb': 'words', 'dictc': {'ke...	[[0.5945125943356216, 0.15692780064527623, 0.0...	c1 c2 a b a a 1 1 b b 2 ...	qq1 rr2 a b a a 1 1 b b ...	(r,)
5	bb	[[0.5167332905196971, 0.7313676911394652, 0.58...	{'dicta': [1, 2, 3], 'dictb': 3, 'dictc': {'ke...	[[0.6826944216406796, 0.06614703871524041, 0.4...	c1 c2 a b a a 1 1 b b 2 ...	rr a b c d b a b c d a ...	a b dtype: object
6	cc	[[0.708744333480539, 0.8321196509452965, 0.132...	{'dicta': [52, 3], 'dictb': [3, 4], 'dictc': {...	[[0.6154355882964945, 0.5022137513822291, 0.64...	c1 c2 a b a a 3 3 b b 4 ...	cpp1 c2 a rr a a 3 3 b...	a
7	dd	[[0.38385169069805636, 0.4351602576907261, 0.2...	{'dicta': [12, 67], 'dictb': (4, 5), 'dictc': ...	[[0.252611789308698, 0.04665515687154631, 0.04...	NaN	NaN	b
8	aa	[[0.8448759939899335, 0.3957149482929728, 0.70...	{'dicta': [1, 23], 'dictb': 3, 'dictc': {'key1...	None	c1 c2 a b a a 1 1 b b 2 ...	sr1 c2 a b a a 1 1 b b ...	1
9	bb	[[0.19421217606406949, 0.7404434981173305, 0.5...	{'dicta': 123, 'dictb': 'words', 'dictc': {'ke...	[[0.7887921954495609, 0.07707935123200094, 0.5...	c1 c2 c3 a b a a 1 1 ...	cpp1 c2 a b a a 3 3 b b ...	2
10	cc	[[0.5360890315477351, 0.11323478357643058, 0.7...	{'dicta': [1, 2, 3], 'dictb': 3, 'dictc': {'ke...	[[0.09228121163345293, 0.02214142758664739, 0....	c1 c2 a b a a 3 3 b b 4 ...	c1 c2 mm b a a 3 3 b b ...	3
11	dd	[[0.22093473502397243, 0.024389277765600403, 0...	{'dicta': [52, 3], 'dictb': [3, 4], 'dictc': {...	[[0.5195772094908402, 0.2494521739025667, 0.39...	NaN	NaN	4

So this dataframe is quite daunting, I like to call it a puffy table.

Exploding the data out that are in puffy tables¶

Now with puffy_to_long you can easily unravel this dataframe. Since this dataframe is weirdly constructed puffy_to_long may take a while:

[3]:

long_df = pb.puffy_to_long(df)
long_df.head()

[3]:

	a	b	c_level0	c	d	e_level0_a	e_level0_b	e_level0_2	e	f_level0_1	f	g_level0	g
0	aa	0.965756	dicta	1	0.833218	a	a	c1	1.0	b	a	NaN	a
1	aa	0.965756	dicta	1	0.833218	a	a	c1	1.0	e	f	NaN	a
2	aa	0.965756	dicta	1	0.833218	a	a	c1	1.0	f	a	NaN	a
3	aa	0.965756	dicta	1	0.833218	a	a	c1	1.0	f1	1	NaN	a
4	aa	0.965756	dicta	1	0.833218	a	a	c1	1.0	hh2	1	NaN	a

Now we have a dataframe with only hashable elements. puffy_to_long iteratively exploded all cells and treated each column individually. For example, if a cell contains a numpy array that is two-dimensional then the column will be exploded twice and two new columns will be added that are called *[COLUMN_NAME]_level0* and *[COLUMN_NAME]_level1*. For our column b, we get two new columns called b_level0 and b_level1. These levels will contain the index corresponding to the data point in the long-format of column b. Let’s try puffy_to_long again but just on column b.

Exploding numpy.array-containing columns¶

[4]:

long_df = pb.puffy_to_long(df, 'b')
long_df.head()

[4]:

	b_level1	b
0	0	0.965756
1	1	0.610598
2	2	0.710187
3	3	0.851220
4	4	0.264982

index_level0 is the previous index from our dataframe. If this were a pandas.MultiIndex, we would have multiple columns instead of just one that corresponds to the old index of the dataframe.

Now, we could take this normal dataframe objects and perform various operations that we would normally want to perform, e.g.:

[5]:

long_df.groupby('b_level0')['b'].mean()

[5]:

b_level0
0    0.489353
1    0.538500
2    0.502751
3    0.490095
4    0.505218
5    0.501370
6    0.474048
7    0.573405
8    0.491318
9    0.516571
Name: b, dtype: float64

Let’s say we want to explode both column b and column d that both contain numpy arrays. Imagine that we want to align the axis 1 of the data in b and d, and call this axes aligned_axis, we can do this with puffy_to_long keyword arguments:

[6]:

long_df = pb.puffy_to_long(df, 'b', 'd', aligned_axis={'b':1, 'd':1})
long_df.head()

[6]:

	b	d_level0	d
0	0.965756	0.0	0.833218
1	0.965756	1.0	0.060445
2	0.965756	2.0	0.620360
3	0.965756	3.0	0.610501
4	0.965756	4.0	0.671548

So now, axis 1 of b and d column data are aligned and those indices are defined in the aligned_axis column. Since both columns contain missing cells, some index_level0 values have missing b and b_level0 columns or missing d_level0 and d columns. You can view these easily using standard pandas functionality:

[7]:

long_df.loc[long_df['b_level0'].isnull()].head()

[7]:

	index_level0	b_level0	b	d_level0	d
10650	1	NaN	NaN	0.0	0.313225
10651	1	NaN	NaN	1.0	0.630630
10652	1	NaN	NaN	2.0	0.074653
10653	1	NaN	NaN	3.0	0.290296
10654	1	NaN	NaN	4.0	0.563310

[8]:

long_df.loc[long_df['d_level0'].isnull()].head()

[8]:

	index_level0	b_level0	b	d_level0	d
9950	8	0.0	0.844876	NaN	NaN
9951	8	1.0	0.905452	NaN	NaN
9952	8	2.0	0.567938	NaN	NaN
9953	8	3.0	0.043224	NaN	NaN
9954	8	4.0	0.736017	NaN	NaN

Exploding pandas.DataFrame-containing columns¶

Let’s take a look at how pandas.DataFrame objects are handled within puffy_to_long by taking a look at column e:

[9]:

long_df = pb.puffy_to_long(df, 'e')
long_df.head()

[9]:

	e_level0_a	e_level0_b	e_level0_2	e
0	a	a	c1	1.0
1	a	a	c2	1.0
2	b	b	c1	2.0
3	b	b	c2	2.0
4	c	c	c1	3.0

pandas.DataFrame objects are handled within one explosion iteration, unless the cell within that dataframe are non-hashable. This is why all new columns contain level0. The first two new columns e_level0_a and e_level0_b correspond to the pandas.MultiIndex index defined in all dataframes within this column. e_level0_2 corresponds to all the columns names of the dataframe. e only contains the data within each cell of each dataframe.

Let’s say we don’t want to unravel our columns in this way but instead just concatenate them all together. We can use the expand_cols argument for this, which expects a list of column names that contain only pandas.DataFrame or pandas.Series objects:

[10]:

long_df = pb.puffy_to_long(df, 'e', expand_cols=['e'])
long_df.head()

[10]:

	index_level0	a	b	e_c1	e_c2	e_c3
0	0	a	a	1	1	NaN
1	0	b	b	2	2	NaN
2	0	c	c	3	3	NaN
3	1	a	a	1	1	NaN
4	1	b	b	2	2	NaN

Here we preserved the columns of each dataframe in each cell and simply concatenated the dataframes together with the index_level0 information preserved. What if we use this method while also exploding column a, since this has the same column name as the column in the dataframes within column e:

[11]:

long_df = pb.puffy_to_long(df, 'a', 'e', expand_cols=['e'])
long_df.head()

[11]:

	index_level0	a	a_e	b	e_c1	e_c2	e_c3
0	0	aa	a	a	1.0	1.0	NaN
1	0	aa	b	b	2.0	2.0	NaN
2	0	aa	c	c	3.0	3.0	NaN
3	1	bb	a	a	1.0	1.0	NaN
4	1	bb	b	b	2.0	2.0	NaN

Since the column a already existed, the column a within each dataframe within column e was renamed to a_e.

Of course, similarly, this is handled with column b:

[12]:

long_df = pb.puffy_to_long(df, 'b', 'e', expand_cols=['e'])
long_df.head()

[12]:

	b_level1	b	a	b_e	e_c1	e_c2	e_c3
0	0.0	0.965756	a	a	1.0	1.0	NaN
1	0.0	0.965756	b	b	2.0	2.0	NaN
2	0.0	0.965756	c	c	3.0	3.0	NaN
3	1.0	0.610598	a	a	1.0	1.0	NaN
4	1.0	0.610598	b	b	2.0	2.0	NaN

Less structured dataframe-containing columns will result in more complex long-format dataframes:

[13]:

long_df = pb.puffy_to_long(df, 'f')
long_df.head()

[13]:

	f_level0_1	f
0	b	a
1	e	f
2	f	a
3	f1	1
4	hh2	1

[14]:

long_df = pb.puffy_to_long(df, 'f', expand_cols=['f'])
long_df.head()

[14]:

	index_level0	level_1	f_f	f_b	f_e	f_f1	f_hh2	f_a	f_hh1	f_qq2	...	f_('a', 'a')	f_('b', 'b')	f_('c', 'c')	f_('d', 'd')	f_rr	f_cpp1	f_c2	f_sr1	f_mm	f_c1
0	0	0	a	a	f	1.0	1.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	0	1	b	b	f	2.0	2.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	0	2	c	c	f	3.0	3.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	0	NaN	a	NaN	NaN	NaN	a	1.0	1.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1	1	NaN	b	NaN	NaN	NaN	b	2.0	2.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

5 rows × 28 columns

Exploding dictionaries¶

The column c contains dictionaries with various data types that are in them. The puffy_to_long algorithm iteratively explodes all objects within the dictionaries:

[15]:

long_df = pb.puffy_to_long(df, 'c')
long_df.head()

[15]:

	c_level0	c_level1	c
0	dicta	0	1
1	dicta	1	2
2	dicta	2	3
3	dictb	NaN	3
4	dictc	key1	1

Since some values within dictionaries can be further exploded, while others cannot some levels/axes contain NaNs when the explosion iteration for that data type stopped (for a specific row).

Philosophy

API Reference

	index_level0	a	a_e	b	e_c1	e_c2	e_c3
0	0	aa	a	a	1.0	1.0	NaN
1	0	aa	b	b	2.0	2.0	NaN
2	0	aa	c	c	3.0	3.0	NaN
3	1	bb	a	a	1.0	1.0	NaN
4	1	bb	b	b	2.0	2.0	NaN

	index_level0	level_1	f_f	f_b	f_e	f_f1	f_hh2	f_a	f_hh1	f_qq2	...	f_('a', 'a')	f_('b', 'b')	f_('c', 'c')	f_('d', 'd')	f_rr	f_cpp1	f_c2	f_sr1	f_mm	f_c1
0	0	0	a	a	f	1.0	1.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	0	1	b	b	f	2.0	2.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	0	2	c	c	f	3.0	3.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	0	NaN	a	NaN	NaN	NaN	a	1.0	1.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1	1	NaN	b	NaN	NaN	NaN	b	2.0	2.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	index_level0	a	a_e	b	e_c1	e_c2	e_c3
0	0	aa	a	a	1.0	1.0	NaN
1	0	aa	b	b	2.0	2.0	NaN
2	0	aa	c	c	3.0	3.0	NaN
3	1	bb	a	a	1.0	1.0	NaN
4	1	bb	b	b	2.0	2.0	NaN

	index_level0	level_1	f_f	f_b	f_e	f_f1	f_hh2	f_a	f_hh1	f_qq2	...	f_('a', 'a')	f_('b', 'b')	f_('c', 'c')	f_('d', 'd')	f_rr	f_cpp1	f_c2	f_sr1	f_mm	f_c1
0	0	0	a	a	f	1.0	1.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	0	1	b	b	f	2.0	2.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	0	2	c	c	f	3.0	3.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	0	NaN	a	NaN	NaN	NaN	a	1.0	1.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1	1	NaN	b	NaN	NaN	NaN	b	2.0	2.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	index_level0	a	a_e	b	e_c1	e_c2	e_c3
0	0	aa	a	a	1.0	1.0	NaN
1	0	aa	b	b	2.0	2.0	NaN
2	0	aa	c	c	3.0	3.0	NaN
3	1	bb	a	a	1.0	1.0	NaN
4	1	bb	b	b	2.0	2.0	NaN

	index_level0	level_1	f_f	f_b	f_e	f_f1	f_hh2	f_a	f_hh1	f_qq2	...	f_('a', 'a')	f_('b', 'b')	f_('c', 'c')	f_('d', 'd')	f_rr	f_cpp1	f_c2	f_sr1	f_mm	f_c1
0	0	0	a	a	f	1.0	1.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	0	1	b	b	f	2.0	2.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	0	2	c	c	f	3.0	3.0	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	0	NaN	a	NaN	NaN	NaN	a	1.0	1.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1	1	NaN	b	NaN	NaN	NaN	b	2.0	2.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN