Finished Exercise 3
This commit is contained in:
parent
3c1fb6c491
commit
4c8a1d0cd0
1 changed files with 42 additions and 3 deletions
|
|
@ -5,7 +5,8 @@ from scipy import stats
|
|||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
# from tqdm import tqdm #Import all needed modules
|
||||
from sklearn.linear_model import SGDRegressor
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
columns = ["Material", "Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"]
|
||||
columnsNoMaterial = ["Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"]
|
||||
|
|
@ -62,7 +63,6 @@ def part1():
|
|||
materialDf = df[df["Material"] == material]
|
||||
for radius in radii:
|
||||
radiusDf = materialDf[materialDf["Radius"] == radius]
|
||||
print(radiusDf)
|
||||
plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m')
|
||||
|
||||
plt.xlabel("Drop Height/m")
|
||||
|
|
@ -76,7 +76,6 @@ def part1():
|
|||
def part2():
|
||||
dfNoMaterial = df.drop("Material", axis=1)
|
||||
corrMatrix = dfNoMaterial.corr(method='pearson')
|
||||
print(corrMatrix)
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1)
|
||||
|
|
@ -127,6 +126,8 @@ def part3():
|
|||
plt.title(f'Iron data and predictions for radius of {radius}m')
|
||||
plt.show()
|
||||
|
||||
predict()
|
||||
|
||||
trainData, testData = train_test_split(ironDf, test_size=0.1)
|
||||
|
||||
features = trainData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
|
|
@ -146,9 +147,47 @@ def part3():
|
|||
print(f'For radius of {radius}m, the true R^2 value is {trueR2} and the predicted R^2 value is {predR2}')
|
||||
plt.show()
|
||||
|
||||
trueVpred()
|
||||
|
||||
def calcResiduals():
|
||||
residualsFeatures = testData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
residuals = testData["Time"] - trainLinearReg.predict(residualsFeatures)
|
||||
|
||||
plt.scatter(testData["Radius"], residuals)
|
||||
plt.show()
|
||||
|
||||
calcResiduals()
|
||||
|
||||
def part4():
|
||||
reg = SGDRegressor()
|
||||
|
||||
regSamples = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
|
||||
regTargets = df[["Time"]]
|
||||
reg.fit(regSamples, regTargets)
|
||||
print(f'The unscaled R^2 value is: {reg.score(regSamples, regTargets)}')
|
||||
|
||||
scaler = StandardScaler()
|
||||
scaledSamples = scaler.fit_transform(regSamples, regTargets)
|
||||
|
||||
scaledReg = SGDRegressor()
|
||||
|
||||
scaledReg.fit(scaledSamples, regTargets)
|
||||
print(f'The scaled R^2 value is: {scaledReg.score(scaledSamples, regTargets)}')
|
||||
deScaledCoefs = scaledReg.coef_ / scaler.scale_
|
||||
|
||||
for i in range(6):
|
||||
print(f'The coefficient of {columns[i+1]} is {deScaledCoefs[i]} {units[i+1]}')
|
||||
|
||||
huberReg = SGDRegressor(loss="huber")
|
||||
|
||||
huberReg.fit(scaledSamples, regTargets)
|
||||
print(f'The scaled R^2 value using the huber loss function is: {huberReg.score(scaledSamples, regTargets)}')
|
||||
deScaledCoefs = huberReg.coef_ / scaler.scale_
|
||||
|
||||
for i in range(6):
|
||||
print(f'The coefficient of {columns[i+1]} using the huber loss function is {deScaledCoefs[i]} {units[i+1]}')
|
||||
|
||||
part1()
|
||||
part2()
|
||||
part3()
|
||||
part4()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue