More Ex 3
This commit is contained in:
parent
5c72bc22b4
commit
3c1fb6c491
1 changed files with 86 additions and 54 deletions
|
|
@ -1,8 +1,10 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy import integrate
|
||||
from scipy import stats
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
# from tqdm import tqdm #Import all needed modules
|
||||
|
||||
columns = ["Material", "Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"]
|
||||
|
|
@ -49,74 +51,104 @@ df = getData('exercise3data.csv')
|
|||
|
||||
####Part 1
|
||||
|
||||
# for i in range(len(columns)):
|
||||
# if columns[i] == "Material":
|
||||
# continue
|
||||
# else:
|
||||
# columnStats(columns[i], units[i])
|
||||
def part1():
|
||||
for i in range(len(columns)):
|
||||
if columns[i] == "Material":
|
||||
continue
|
||||
else:
|
||||
columnStats(columns[i], units[i])
|
||||
|
||||
# for material in materials:
|
||||
# materialDf = df[df["Material"] == material]
|
||||
# for radius in radii:
|
||||
# radiusDf = materialDf[materialDf["Radius"] == radius]
|
||||
# print(radiusDf)
|
||||
# plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m')
|
||||
for material in materials:
|
||||
materialDf = df[df["Material"] == material]
|
||||
for radius in radii:
|
||||
radiusDf = materialDf[materialDf["Radius"] == radius]
|
||||
print(radiusDf)
|
||||
plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m')
|
||||
|
||||
# plt.xlabel("Drop Height/m")
|
||||
# plt.ylabel("Fall Time/s")
|
||||
# plt.title(f'Material: {material}')
|
||||
# plt.legend()
|
||||
# plt.show()
|
||||
plt.xlabel("Drop Height/m")
|
||||
plt.ylabel("Fall Time/s")
|
||||
plt.title(f'Material: {material}')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
####Part 2
|
||||
|
||||
# dfNoMaterial = df.drop("Material", axis=1)
|
||||
# corrMatrix = dfNoMaterial.corr(method='pearson')
|
||||
# print(corrMatrix)
|
||||
def part2():
|
||||
dfNoMaterial = df.drop("Material", axis=1)
|
||||
corrMatrix = dfNoMaterial.corr(method='pearson')
|
||||
print(corrMatrix)
|
||||
|
||||
# fig, ax = plt.subplots()
|
||||
# im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1)
|
||||
fig, ax = plt.subplots()
|
||||
im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1)
|
||||
|
||||
# ax.set_xticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial)
|
||||
# ax.set_yticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial)
|
||||
ax.set_xticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial)
|
||||
ax.set_yticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial)
|
||||
|
||||
# for i in range(len(columnsNoMaterial)):
|
||||
# for j in range(len(columnsNoMaterial)):
|
||||
# text = ax.text(j, i, round(corrMatrix[columnsNoMaterial[i]][columnsNoMaterial[j]], 2),
|
||||
# ha="center", va="center", color="w")
|
||||
for i in range(len(columnsNoMaterial)):
|
||||
for j in range(len(columnsNoMaterial)):
|
||||
text = ax.text(j, i, round(corrMatrix[columnsNoMaterial[i]][columnsNoMaterial[j]], 2),
|
||||
ha="center", va="center", color="w")
|
||||
|
||||
# fig.colorbar(im)
|
||||
# fig.tight_layout()
|
||||
# plt.show()
|
||||
fig.colorbar(im)
|
||||
fig.tight_layout()
|
||||
plt.show()
|
||||
|
||||
####Part 3
|
||||
|
||||
features = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
targets = df["Time"]
|
||||
def part3():
|
||||
features = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
targets = df["Time"]
|
||||
|
||||
linearReg = LinearRegression()
|
||||
linearFit = linearReg.fit(features, targets)
|
||||
linearReg = LinearRegression()
|
||||
linearFit = linearReg.fit(features, targets)
|
||||
|
||||
for i in range(len(linearFit.feature_names_in_)):
|
||||
print(f'The coefficient of {linearFit.feature_names_in_[i]} is {linearFit.coef_[i]} {units[i+1]}')
|
||||
for i in range(len(linearFit.feature_names_in_)):
|
||||
print(f'The coefficient of {linearFit.feature_names_in_[i]} is {linearFit.coef_[i]} {units[i+1]}')
|
||||
|
||||
ironDf = df[df["Material"] == "iron"]
|
||||
ironDf = df[df["Material"] == "iron"]
|
||||
|
||||
def fitByMeans(density, radius, mass, temp, pressure, height):
|
||||
coefs = linearFit.coef_
|
||||
time = linearFit.intercept_+(density*coefs[0])+(radius*coefs[1])+(mass*coefs[2])+(temp*coefs[3])+(pressure*coefs[4])+(height*coefs[5])
|
||||
return time
|
||||
def fitByMeans(density, radius, mass, temp, pressure, height):
|
||||
coefs = linearFit.coef_
|
||||
time = linearFit.intercept_+(density*coefs[0])+(radius*coefs[1])+(mass*coefs[2])+(temp*coefs[3])+(pressure*coefs[4])+(height*coefs[5])
|
||||
return time
|
||||
|
||||
for radius in radii:
|
||||
radiusDf = ironDf[ironDf["Radius"] == radius]
|
||||
plt.scatter(radiusDf["Height"], radiusDf["Time"],label="Experimental data")
|
||||
radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
plt.scatter(radiusDf["Height"], linearReg.predict(radiusFeatures),label="Predicted data")
|
||||
heightBounds = [radiusDf["Height"].min(),radiusDf["Height"].max()]
|
||||
linearByMeans = [fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].min()),fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].max())]
|
||||
plt.plot(heightBounds,linearByMeans,label="Fit Using Means")
|
||||
plt.xlabel("Drop Height/m")
|
||||
plt.ylabel("Fall Time/s")
|
||||
plt.legend()
|
||||
plt.title(f'Iron data and predictions for radius of {radius}m')
|
||||
plt.show()
|
||||
def predict():
|
||||
for radius in radii:
|
||||
radiusDf = ironDf[ironDf["Radius"] == radius]
|
||||
plt.scatter(radiusDf["Height"], radiusDf["Time"],label="Experimental data")
|
||||
radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
plt.scatter(radiusDf["Height"], linearReg.predict(radiusFeatures),label="Predicted data")
|
||||
heightBounds = [radiusDf["Height"].min(),radiusDf["Height"].max()]
|
||||
linearByMeans = [fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].min()),fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].max())]
|
||||
plt.plot(heightBounds,linearByMeans,label="Fit Using Means")
|
||||
plt.xlabel("Drop Height/m")
|
||||
plt.ylabel("Fall Time/s")
|
||||
plt.legend()
|
||||
plt.title(f'Iron data and predictions for radius of {radius}m')
|
||||
plt.show()
|
||||
|
||||
trainData, testData = train_test_split(ironDf, test_size=0.1)
|
||||
|
||||
features = trainData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
targets = trainData["Time"]
|
||||
|
||||
trainLinearReg = LinearRegression()
|
||||
trainLinearFit = trainLinearReg.fit(features, targets)
|
||||
|
||||
def trueVpred():
|
||||
for radius in radii:
|
||||
radiusDf = testData[testData["Radius"] == radius]
|
||||
plt.scatter(radiusDf["Height"], radiusDf["Time"])
|
||||
trueR2 = (stats.linregress(radiusDf["Height"], radiusDf["Time"]).rvalue)**2
|
||||
radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
plt.scatter(radiusDf["Height"], trainLinearReg.predict(radiusFeatures),label="Predicted data")
|
||||
predR2 = (stats.linregress(radiusDf["Height"], trainLinearReg.predict(radiusFeatures)).rvalue)**2
|
||||
print(f'For radius of {radius}m, the true R^2 value is {trueR2} and the predicted R^2 value is {predR2}')
|
||||
plt.show()
|
||||
|
||||
def calcResiduals():
|
||||
residualsFeatures = testData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
residuals = testData["Time"] - trainLinearReg.predict(residualsFeatures)
|
||||
|
||||
plt.scatter(testData["Radius"], residuals)
|
||||
plt.show()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue