diff --git a/Exercise 3/exercise3.py b/Exercise 3/exercise3.py index 9da9f66..8137e4b 100644 --- a/Exercise 3/exercise3.py +++ b/Exercise 3/exercise3.py @@ -1,8 +1,10 @@ import matplotlib.pyplot as plt import numpy as np from scipy import integrate +from scipy import stats import pandas as pd from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split # from tqdm import tqdm #Import all needed modules columns = ["Material", "Density", "Radius", "Mass", "Temperature", "Pressure", "Height", "Time"] @@ -49,74 +51,104 @@ df = getData('exercise3data.csv') ####Part 1 -# for i in range(len(columns)): -# if columns[i] == "Material": -# continue -# else: -# columnStats(columns[i], units[i]) +def part1(): + for i in range(len(columns)): + if columns[i] == "Material": + continue + else: + columnStats(columns[i], units[i]) -# for material in materials: -# materialDf = df[df["Material"] == material] -# for radius in radii: -# radiusDf = materialDf[materialDf["Radius"] == radius] -# print(radiusDf) -# plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m') + for material in materials: + materialDf = df[df["Material"] == material] + for radius in radii: + radiusDf = materialDf[materialDf["Radius"] == radius] + print(radiusDf) + plt.scatter(radiusDf["Height"], radiusDf["Time"], label=f'Radius {radius}m') -# plt.xlabel("Drop Height/m") -# plt.ylabel("Fall Time/s") -# plt.title(f'Material: {material}') -# plt.legend() -# plt.show() + plt.xlabel("Drop Height/m") + plt.ylabel("Fall Time/s") + plt.title(f'Material: {material}') + plt.legend() + plt.show() ####Part 2 -# dfNoMaterial = df.drop("Material", axis=1) -# corrMatrix = dfNoMaterial.corr(method='pearson') -# print(corrMatrix) +def part2(): + dfNoMaterial = df.drop("Material", axis=1) + corrMatrix = dfNoMaterial.corr(method='pearson') + print(corrMatrix) -# fig, ax = plt.subplots() -# im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1) + fig, ax = plt.subplots() + im = ax.imshow(corrMatrix, cmap="gnuplot", vmin=-1, vmax=1) -# ax.set_xticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial) -# ax.set_yticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial) + ax.set_xticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial) + ax.set_yticks(range(len(columnsNoMaterial)), labels=columnsNoMaterial) -# for i in range(len(columnsNoMaterial)): -# for j in range(len(columnsNoMaterial)): -# text = ax.text(j, i, round(corrMatrix[columnsNoMaterial[i]][columnsNoMaterial[j]], 2), -# ha="center", va="center", color="w") + for i in range(len(columnsNoMaterial)): + for j in range(len(columnsNoMaterial)): + text = ax.text(j, i, round(corrMatrix[columnsNoMaterial[i]][columnsNoMaterial[j]], 2), + ha="center", va="center", color="w") -# fig.colorbar(im) -# fig.tight_layout() -# plt.show() + fig.colorbar(im) + fig.tight_layout() + plt.show() ####Part 3 -features = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] -targets = df["Time"] +def part3(): + features = df[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + targets = df["Time"] -linearReg = LinearRegression() -linearFit = linearReg.fit(features, targets) + linearReg = LinearRegression() + linearFit = linearReg.fit(features, targets) -for i in range(len(linearFit.feature_names_in_)): - print(f'The coefficient of {linearFit.feature_names_in_[i]} is {linearFit.coef_[i]} {units[i+1]}') + for i in range(len(linearFit.feature_names_in_)): + print(f'The coefficient of {linearFit.feature_names_in_[i]} is {linearFit.coef_[i]} {units[i+1]}') -ironDf = df[df["Material"] == "iron"] + ironDf = df[df["Material"] == "iron"] -def fitByMeans(density, radius, mass, temp, pressure, height): - coefs = linearFit.coef_ - time = linearFit.intercept_+(density*coefs[0])+(radius*coefs[1])+(mass*coefs[2])+(temp*coefs[3])+(pressure*coefs[4])+(height*coefs[5]) - return time + def fitByMeans(density, radius, mass, temp, pressure, height): + coefs = linearFit.coef_ + time = linearFit.intercept_+(density*coefs[0])+(radius*coefs[1])+(mass*coefs[2])+(temp*coefs[3])+(pressure*coefs[4])+(height*coefs[5]) + return time -for radius in radii: - radiusDf = ironDf[ironDf["Radius"] == radius] - plt.scatter(radiusDf["Height"], radiusDf["Time"],label="Experimental data") - radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] - plt.scatter(radiusDf["Height"], linearReg.predict(radiusFeatures),label="Predicted data") - heightBounds = [radiusDf["Height"].min(),radiusDf["Height"].max()] - linearByMeans = [fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].min()),fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].max())] - plt.plot(heightBounds,linearByMeans,label="Fit Using Means") - plt.xlabel("Drop Height/m") - plt.ylabel("Fall Time/s") - plt.legend() - plt.title(f'Iron data and predictions for radius of {radius}m') - plt.show() + def predict(): + for radius in radii: + radiusDf = ironDf[ironDf["Radius"] == radius] + plt.scatter(radiusDf["Height"], radiusDf["Time"],label="Experimental data") + radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + plt.scatter(radiusDf["Height"], linearReg.predict(radiusFeatures),label="Predicted data") + heightBounds = [radiusDf["Height"].min(),radiusDf["Height"].max()] + linearByMeans = [fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].min()),fitByMeans(radiusDf["Density"].mean(),radiusDf["Radius"].mean(),radiusDf["Mass"].mean(),radiusDf["Temperature"].mean(),radiusDf["Pressure"].mean(),radiusDf["Height"].max())] + plt.plot(heightBounds,linearByMeans,label="Fit Using Means") + plt.xlabel("Drop Height/m") + plt.ylabel("Fall Time/s") + plt.legend() + plt.title(f'Iron data and predictions for radius of {radius}m') + plt.show() + + trainData, testData = train_test_split(ironDf, test_size=0.1) + + features = trainData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + targets = trainData["Time"] + + trainLinearReg = LinearRegression() + trainLinearFit = trainLinearReg.fit(features, targets) + + def trueVpred(): + for radius in radii: + radiusDf = testData[testData["Radius"] == radius] + plt.scatter(radiusDf["Height"], radiusDf["Time"]) + trueR2 = (stats.linregress(radiusDf["Height"], radiusDf["Time"]).rvalue)**2 + radiusFeatures = radiusDf[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + plt.scatter(radiusDf["Height"], trainLinearReg.predict(radiusFeatures),label="Predicted data") + predR2 = (stats.linregress(radiusDf["Height"], trainLinearReg.predict(radiusFeatures)).rvalue)**2 + print(f'For radius of {radius}m, the true R^2 value is {trueR2} and the predicted R^2 value is {predR2}') + plt.show() + + def calcResiduals(): + residualsFeatures = testData[["Density", "Radius", "Mass", "Temperature", "Pressure", "Height"]] + residuals = testData["Time"] - trainLinearReg.predict(residualsFeatures) + + plt.scatter(testData["Radius"], residuals) + plt.show()