Features consist of hourly average ambient variables
- Temperature (T) in the range 1.81°C and 37.11°C,
- Ambient Pressure (AP) in the range 992.89-1033.30 milibar,
- Relative Humidity (RH) in the range 25.56% to 100.16%
- Exhaust Vacuum (V) in teh range 25.36-81.56 cm Hg
- Net hourly electrical energy output (EP) 420.26-495.76 MW
import numpy as np
import pandas as pd
url = "https://DataScienceSchools.github.io/Machine_Learning/Sklearn/Case_Study/PowerPlant/PowerPlant.csv"
df = pd.read_csv(url)
df.head()
AT | V | AP | RH | PE | |
---|---|---|---|---|---|
0 | 8.34 | 40.77 | 1010.84 | 90.01 | 480.48 |
1 | 23.64 | 58.49 | 1011.40 | 74.20 | 445.75 |
2 | 29.74 | 56.90 | 1007.15 | 41.91 | 438.76 |
3 | 19.07 | 49.69 | 1007.22 | 76.79 | 453.09 |
4 | 11.80 | 40.66 | 1017.13 | 97.20 | 464.43 |
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_poly, y_train)
LinearRegression()
y_pred = model.predict(poly_reg.transform(X_test))
data = pd.DataFrame()
pd.set_option('precision', 2)
data['Predicted_Y'] = y_pred
data['Real_Y'] = y_test
data
Predicted_Y | Real_Y | |
---|---|---|
0 | 430.93 | 426.18 |
1 | 448.01 | 451.10 |
2 | 444.38 | 442.87 |
3 | 445.81 | 443.70 |
4 | 461.45 | 460.59 |
... | ... | ... |
1909 | 467.41 | 468.19 |
1910 | 433.69 | 431.16 |
1911 | 455.09 | 454.20 |
1912 | 446.74 | 444.13 |
1913 | 433.52 | 436.58 |
1914 rows × 2 columns
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
0.9428968311604174