A small script I wrote just because I happened to need it. Generates a quick and more "easy-to-read" frequency table for ranges specified in the bins.
Simple Frequency Table CodeTakes a list (sample), separates them by bins, and gives a frequency table with histogram
In [1]:
#Imports
import pandas as pd
import seaborn as sns
In [2]:
#Give list
sample = [10, 15, 12, 17, 22, 14, 23, 8, 15, 11, 17, 12, 16, 26, 12, 11, 9, 16, 15, 24, 12, 17, 16, 14, 19, 13, 10, 15, 19, 20,
10, 25, 14, 15, 12, 22, 7, 28, 16, 9]
#Put list into df
df = pd.DataFrame(sample, columns=['nums'])
In [3]:
#Set bin sizes
bins = [5, 9, 13, 17, 21, 25, 29]
In [4]:
#Put into dataframe
newdf = pd.DataFrame(pd.cut(df['nums'], bins=bins).value_counts()).sort_index()
newdf.reset_index(inplace=True)
#Convert to String
newdf['index'] = newdf['index'].astype(str)
In [5]:
#Set 'easy-to-read' names for bins
left = newdf['index'].str.split(',').str[0].str.split('(').str[1].astype('int32') + 1
right = newdf['index'].str.split(',').str[1].str.split(']').str[0]
fullname = left.astype(str) + ' -' + right
newdf['index'] = fullname
In [6]:
#cummulative frequency
newdf['cumfreq'] = newdf['nums'].cumsum()
#relative frequency
newdf['relfreq'] = newdf['nums'] / newdf['nums'].sum()
#cummulative relative frequency
newdf['cumrelfreq'] = newdf['relfreq'].cumsum()
#Add column names
newdf.columns =['Class Interval', 'Frequency', 'Cummulative Frequency', 'Relative Frequency', 'Cumulative Relative Frequency']
In [7]:
#Show frequency table
newdf
Out[7]:
Class...