Finished Exercise 3

This commit is contained in:
Ceres 2026-02-17 11:50:12 +00:00
parent 3c1fb6c491
commit 4c8a1d0cd0
Signed by: ceres-sees-all
GPG key ID: 9814758436430045

View file

@ -5,7 +5,8 @@ from scipy import stats
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# from tqdm import tqdm #Import all needed modules
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
columns = ["Material", "Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"]
columnsNoMaterial = ["Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"]
@ -62,7 +63,6 @@ def part1():
materialDf = df[df["Material"] == material]
for radius in radii:
radiusDf = materialDf[materialDf["Radius"] == radius]
print(radiusDf)
plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m')
plt.xlabel("Drop Height/m")
@ -76,7 +76,6 @@ def part1():
def part2():
dfNoMaterial = df.drop("Material", axis=1)
corrMatrix = dfNoMaterial.corr(method='pearson')
print(corrMatrix)
fig, ax = plt.subplots()
im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1)
@ -127,6 +126,8 @@ def part3():
plt.title(f'Iron data and predictions for radius of {radius}m')
plt.show()
predict()
trainData, testData = train_test_split(ironDf, test_size=0.1)
features = trainData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
@ -146,9 +147,47 @@ def part3():
print(f'For radius of {radius}m, the true R^2 value is {trueR2} and the predicted R^2 value is {predR2}')
plt.show()
trueVpred()
def calcResiduals():
residualsFeatures = testData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
residuals = testData["Time"] - trainLinearReg.predict(residualsFeatures)
plt.scatter(testData["Radius"], residuals)
plt.show()
calcResiduals()
def part4():
reg = SGDRegressor()
regSamples = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]]
regTargets = df[["Time"]]
reg.fit(regSamples, regTargets)
print(f'The unscaled R^2 value is: {reg.score(regSamples, regTargets)}')
scaler = StandardScaler()
scaledSamples = scaler.fit_transform(regSamples, regTargets)
scaledReg = SGDRegressor()
scaledReg.fit(scaledSamples, regTargets)
print(f'The scaled R^2 value is: {scaledReg.score(scaledSamples, regTargets)}')
deScaledCoefs = scaledReg.coef_ / scaler.scale_
for i in range(6):
print(f'The coefficient of {columns[i+1]} is {deScaledCoefs[i]} {units[i+1]}')
huberReg = SGDRegressor(loss="huber")
huberReg.fit(scaledSamples, regTargets)
print(f'The scaled R^2 value using the huber loss function is: {huberReg.score(scaledSamples, regTargets)}')
deScaledCoefs = huberReg.coef_ / scaler.scale_
for i in range(6):
print(f'The coefficient of {columns[i+1]} using the huber loss function is {deScaledCoefs[i]} {units[i+1]}')
part1()
part2()
part3()
part4()