Shonubi/Bootstrap exercise

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
password = "PUT THE SECRET CLASS SERVER PASSWORD HERE"
df = pd.read_csv("https://gobbledygook.herokuapp.com/data?file=shonubi.csv&password=" + password)
In [2]:
df.head()
Out[2]:
balloons netwt purity customs
0 79 503.2 0.51 0
1 90 576.9 0.32 0
2 5 23.1 0.62 0
3 17 119.2 0.56 0
4 90 549.0 0.92 0
In [3]:
trimmed = df[(df.netwt > 42) & (df.netwt < 1225)]
In [4]:
trimmed.count()
Out[4]:
balloons    135
netwt       135
purity      135
customs     135
dtype: int64
In [5]:
def get_sample():
    s = np.random.choice(trimmed.netwt, size=7, replace=True)
    return np.sum(s)

print(get_sample())
2920.4999999999995
In [6]:
print(get_sample())
3175.3
In [7]:
print(get_sample())
2372.7999999999997
In [8]:
samples = []
for iteration in range(100000):
    samples.append(get_sample())
In [9]:
len(samples)
Out[9]:
100000
In [10]:
sns.distplot(samples)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10dd3ef98>
In [11]:
np.mean(samples)
Out[11]:
2937.7987940000003
In [12]:
np.std(samples)
Out[12]:
482.2338438103921
In [13]:
np.mean(samples) - 2 * np.std(samples)
Out[13]:
1973.3311063792162
In [14]:
np.percentile(samples, 1)
Out[14]:
1863.6999999999998
In [15]:
np.percentile(samples, 99)
Out[15]:
4129.099999999999
In [16]:
from scipy.stats import percentileofscore
In [17]:
percentileofscore(samples, 2000)
Out[17]:
2.168
In [18]:
percentileofscore(samples, 3000)
Out[18]:
56.328

links