Below is a small example showing how to utilize Keras/Tensorflow 2.0 to predict a value utilizing a small dataset. More explanations to follow in the Jupyter notebook below…
In [1]:
#Imports
import pandas as pd
import numpy as np
import seaborn as sns
In [2]:
df = pd.read_csv('Keras/fake_reg.csv')
In [3]:
df.head()
Out[3]:
In [4]:
sns.pairplot(df)
Out[4]:
In [5]:
from sklearn.model_selection import train_test_split
In [6]:
#We need .values because it's best to pass in numpy arrays due to how tensorflow works
X = df[['feature1', 'feature2']].values
y = df['price'].values
In [7]:
#Split into test/train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [8]:
#Scale data to be between 0 and 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
In [9]:
#Calc params needed to scale later on
#Only perform on training set as to not influence based on test data
scaler.fit(X_train)
Out[9]:
In [10]:
#Perform transformation
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
In [12]:
#Add multiple layers into sequential with the number of neurons needed
model = Sequential()
model.add(Dense(4,activation='relu'))
model.add(Dense(4,activation='relu'))
model.add(Dense(4,activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mse')
In [34]:
#Fit
model.fit(x=X_train,y=y_train,epochs=400)
In [14]:
#Grab losses and stick into a dataframe
loss_df = pd.DataFrame(model.history.history)
In [15]:
#Plot out of the losses
#I.E. show how the model continually improves until no more improvement is possible
loss_df.plot()
Out[15]:
In [16]:
#Evaluate MSE for test vs training set
training_score = model.evaluate(X_train,y_train,verbose=0)
test_score = model.evaluate(X_test,y_test,verbose=0)
In [17]:
training_score
Out[17]:
In [18]:
test_score
Out[18]:
In [19]:
test_predictions = model.predict(X_test)
In [21]:
#Put in a series
test_predictions = pd.Series(test_predictions.reshape(300,))
In [22]:
#Concatenate into one dataframe
pred_df = pd.DataFrame(y_test,columns=['Test True Y'])
pred_df = pd.concat([pred_df,test_predictions], axis=1)
pred_df.columns = ['Test True Y', 'Model Predictions']
In [23]:
#Compare Correlation of Test/Train
sns.scatterplot(x='Test True Y', y='Model Predictions', data=pred_df)
Out[23]:
In [24]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
In [25]:
#MAS
#I.E. On average, how many dollars off are we?
mean_absolute_error(pred_df['Test True Y'], pred_df['Model Predictions'])
Out[25]:
In [26]:
#MSE
mean_squared_error(pred_df['Test True Y'], pred_df['Model Predictions'])
Out[26]:
In [27]:
#RMSE
mean_squared_error(pred_df['Test True Y'], pred_df['Model Predictions'])**0.5
Out[27]:
In [28]:
new_gem = [[998,1000]]
In [29]:
#Remember to scale the data
new_gem = scaler.transform(new_gem)
In [30]:
model.predict(new_gem)
Out[30]:
In [31]:
from tensorflow.keras.models import load_model
In [32]:
model.save('my_gem_model.h5')
In [33]:
later_model = load_model('my_gem_model.h5')
WordPress conversion from Keras Basics.ipynb by nb2wp v0.3.1