In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
password = "PUT THE SECRET CLASS SERVER PASSWORD HERE"
df = pd.read_csv("https://gobbledygook.herokuapp.com/data?file=shonubi.csv&password=" + password)
In [2]:
df.head()
Out[2]:
In [3]:
trimmed = df[(df.netwt > 42) & (df.netwt < 1225)]
In [4]:
trimmed.count()
Out[4]:
In [5]:
def get_sample():
s = np.random.choice(trimmed.netwt, size=7, replace=True)
return np.sum(s)
print(get_sample())
In [6]:
print(get_sample())
In [7]:
print(get_sample())
In [8]:
samples = []
for iteration in range(100000):
samples.append(get_sample())
In [9]:
len(samples)
Out[9]:
In [10]:
sns.distplot(samples)
Out[10]:
In [11]:
np.mean(samples)
Out[11]:
In [12]:
np.std(samples)
Out[12]:
In [13]:
np.mean(samples) - 2 * np.std(samples)
Out[13]:
In [14]:
np.percentile(samples, 1)
Out[14]:
In [15]:
np.percentile(samples, 99)
Out[15]:
In [16]:
from scipy.stats import percentileofscore
In [17]:
percentileofscore(samples, 2000)
Out[17]:
In [18]:
percentileofscore(samples, 3000)
Out[18]: