From 4c8a1d0cd0cd52ebd5b169c796f9c1e1c2196e3a Mon Sep 17 00:00:00 2001 From: Ceres Date: Tue, 17 Feb 2026 11:50:12 +0000 Subject: [PATCH] Finished Exercise 3 --- Exercise 3/exercise3.py | 45 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/Exercise 3/exercise3.py b/Exercise 3/exercise3.py index 8137e4b..bedca73 100644 --- a/Exercise 3/exercise3.py +++ b/Exercise 3/exercise3.py @@ -5,7 +5,8 @@ from scipy import stats import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split -# from tqdm import tqdm #Import all needed modules +from sklearn.linear_model import SGDRegressor +from sklearn.preprocessing import StandardScaler columns = ["Material", "Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"] columnsNoMaterial = ["Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"] @@ -62,7 +63,6 @@ def part1(): materialDf = df[df["Material"] == material] for radius in radii: radiusDf = materialDf[materialDf["Radius"] == radius] - print(radiusDf) plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m') plt.xlabel("Drop Height/m") @@ -76,7 +76,6 @@ def part1(): def part2(): dfNoMaterial = df.drop("Material", axis=1) corrMatrix = dfNoMaterial.corr(method='pearson') - print(corrMatrix) fig, ax = plt.subplots() im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1) @@ -127,6 +126,8 @@ def part3(): plt.title(f'Iron data and predictions for radius of {radius}m') plt.show() + predict() + trainData, testData = train_test_split(ironDf, test_size=0.1) features = trainData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] @@ -146,9 +147,47 @@ def part3(): print(f'For radius of {radius}m, the true R^2 value is {trueR2} and the predicted R^2 value is {predR2}') plt.show() + trueVpred() + def calcResiduals(): residualsFeatures = testData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] residuals = testData["Time"] - trainLinearReg.predict(residualsFeatures) plt.scatter(testData["Radius"], residuals) plt.show() + + calcResiduals() + +def part4(): + reg = SGDRegressor() + + regSamples = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + regTargets = df[["Time"]] + reg.fit(regSamples, regTargets) + print(f'The unscaled R^2 value is: {reg.score(regSamples, regTargets)}') + + scaler = StandardScaler() + scaledSamples = scaler.fit_transform(regSamples, regTargets) + + scaledReg = SGDRegressor() + + scaledReg.fit(scaledSamples, regTargets) + print(f'The scaled R^2 value is: {scaledReg.score(scaledSamples, regTargets)}') + deScaledCoefs = scaledReg.coef_ / scaler.scale_ + + for i in range(6): + print(f'The coefficient of {columns[i+1]} is {deScaledCoefs[i]} {units[i+1]}') + + huberReg = SGDRegressor(loss="huber") + + huberReg.fit(scaledSamples, regTargets) + print(f'The scaled R^2 value using the huber loss function is: {huberReg.score(scaledSamples, regTargets)}') + deScaledCoefs = huberReg.coef_ / scaler.scale_ + + for i in range(6): + print(f'The coefficient of {columns[i+1]} using the huber loss function is {deScaledCoefs[i]} {units[i+1]}') + +part1() +part2() +part3() +part4()