-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgusiyan.py
213 lines (149 loc) · 6.25 KB
/
gusiyan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import math
import matplotlib.pyplot as plt
class Gaussian():
""" Gaussian distribution class for calculating and
visualizing a Gaussian distribution.
Attributes:
mean (float) representing the mean value of the distribution
stdev (float) representing the standard deviation of the distribution
data_list (list of floats) a list of floats extracted from the data file
"""
def __init__(self, mu = 0, sigma = 1):
self.mean = mu
self.stdev = sigma
self.data = []
def calculate_mean(self):
"""Method to calculate the mean of the data set.
Args:
None
Returns:
float: mean of the data set
"""
avg = 1.0 * sum(self.data)/len(self.data)
self.mean = avg
return self.mean
def calculate_stdev(self, sample=True):
"""Method to calculate the standard deviation of the data set.
Args:
sample (bool): whether the data represents a sample or population
Returns:
float: standard deviation of the data set
"""
if sample:
n = len(self.data) - 1
else:
n = len(self.data)
mean = self.mean
sigma = 0
for d in self.data:
sigma += (d-mean) ** 2
sigma = math.sqrt(sigma / n)
self.stdev = sigma
return self.stdev
def read_data_file(self, file_name, sample=True):
"""Method to read in data from a txt file. The txt file should have
one number (float) per line. The numbers are stored in the data attribute.
After reading in the file, the mean and standard deviation are calculated
Args:
file_name (string): name of a file to read from
Returns:
None
"""
# This code opens a data file and appends the data to a list called data_list
with open(file_name) as file:
data_list = []
line = file.readline()
while line:
data_list.append(int(line))
line = file.readline()
file.close()
self.data = data_list
self.mean = self.calculate_mean()
self.stdev = self.calculate_stdev(sample)
def plot_histogram(self):
"""Method to output a histogram of the instance variable data using
matplotlib pyplot library.
Args:
None
Returns:
None
"""
plt.hist(self.data)
plt.title("Histrograme of Data")
plt.xlabel('data')
plt.ylabel('count')
def pdf(self, x):
"""Probability density function calculator for the gaussian distribution.
Args:
x (float): point for calculating the probability density function
Returns:
float: probability density function output
"""
return (1.0 / (self.stdev * math.sqrt(2*math.pi))) * math.exp(-0.5*((x - self.mean) / self.stdev) ** 2)
def plot_histogram_pdf(self, n_spaces = 50):
"""Method to plot the normalized histogram of the data and a plot of the
probability density function along the same range
Args:
n_spaces (int): number of data points
Returns:
list: x values for the pdf plot
list: y values for the pdf plot
"""
#TODO: Nothing to do for this method. Try it out and see how it works.
mu = self.mean
sigma = self.stdev
min_range = min(self.data)
max_range = max(self.data)
# calculates the interval between x values
interval = 1.0 * (max_range - min_range) / n_spaces
x = []
y = []
# calculate the x values to visualize
for i in range(n_spaces):
tmp = min_range + interval*i
x.append(tmp)
y.append(self.pdf(tmp))
# make the plots
fig, axes = plt.subplots(2,sharex=True)
fig.subplots_adjust(hspace=.5)
axes[0].hist(self.data, density=True)
axes[0].set_title('Normed Histogram of Data')
axes[0].set_ylabel('Density')
axes[1].plot(x, y)
axes[1].set_title('Normal Distribution for \n Sample Mean and Sample Standard Deviation')
axes[0].set_ylabel('Density')
plt.show()
return x, y
def __add__(self, other):
"""Magic method to add together two Gaussian distributions
Args:
other (Gaussian): Gaussian instance
Returns:
Gaussian: Gaussian distribution
"""
None
Returns:
string: characteristics of the Gaussian
"""
# TODO: Return a string in the following format -
# "mean mean_value, standard deviation standard_deviation_value"
# where mean_value is the mean of the Gaussian distribution
# and standard_deviation_value is the standard deviation of
# the Gaussian.
# For example "mean 3.5, standard deviation 1.3"
pass
# TODO: Calculate the results of summing two Gaussian distributions
# When summing two Gaussian distributions, the mean value is the sum
# of the means of each Gaussian.
#
# When summing two Gaussian distributions, the standard deviation is the
# square root of the sum of square ie sqrt(stdev_one ^ 2 + stdev_two ^ 2)
# create a new Gaussian object
result = Gaussian()
# TODO: calculate the mean and standard deviation of the sum of two Gaussians
result.mean = 5 # change this line to calculate the mean of the sum of two Gaussian distributions
result.stdev = 2 # change this line to calculate the standard deviation of the sum of two Gaussian distributions
return result
def __repr__(self):
"""Magic method to output the characteristics of the Gaussian instance
Args: