1
0
mirror of https://github.com/Microsoft/sql-server-samples.git synced 2025-12-08 14:58:54 +00:00
Files
sql-server-samples/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py

74 lines
2.6 KiB
Python

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
#If you are running SQL Server 2017 RC1 and above:
from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData
from revoscalepy import rx_import
def get_rental_predictions():
#Connection string to connect to SQL Server named instance
conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
#Define the columns we wish to import
column_info = {
"Year" : { "type" : "integer" },
"Month" : { "type" : "integer" },
"Day" : { "type" : "integer" },
"RentalCount" : { "type" : "integer" },
"WeekDay" : {
"type" : "factor",
"levels" : ["1", "2", "3", "4", "5", "6", "7"]
},
"Holiday" : {
"type" : "factor",
"levels" : ["1", "0"]
},
"Snow" : {
"type" : "factor",
"levels" : ["1", "0"]
}
}
#Get the data from SQL Server Table
data_source = RxSqlServerData(table="dbo.rental_data",
connection_string=conn_str, column_info=column_info)
computeContext = RxInSqlServer(
connection_string = conn_str,
num_tasks = 1,
auto_cleanup = False
)
RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)
# import data source and convert to pandas dataframe
df = pd.DataFrame(rx_import(input_data = data_source))
print("Data frame:", df)
# Get all the columns from the dataframe.
columns = df.columns.tolist()
# Filter the columns to remove ones we don't want to use in the training
columns = [c for c in columns if c not in ["Year"]]
# Store the variable we'll be predicting on.
target = "RentalCount"
# Generate the training set. Set random_state to be able to replicate results.
train = df.sample(frac=0.8, random_state=1)
# Select anything not in the training set and put it in the testing set.
test = df.loc[~df.index.isin(train.index)]
# Print the shapes of both sets.
print("Training set shape:", train.shape)
print("Testing set shape:", test.shape)
# Initialize the model class.
lin_model = LinearRegression()
# Fit the model to the training data.
lin_model.fit(train[columns], train[target])
# Generate our predictions for the test set.
lin_predictions = lin_model.predict(test[columns])
print("Predictions:", lin_predictions)
# Compute error between our test predictions and the actual values.
lin_mse = mean_squared_error(lin_predictions, test[target])
print("Computed error:", lin_mse)
get_rental_predictions()