mirror of
https://github.com/Microsoft/sql-server-samples.git
synced 2025-12-08 14:58:54 +00:00
101 lines
4.1 KiB
Transact-SQL
101 lines
4.1 KiB
Transact-SQL
USE [tpcxbb_1gb]
|
|
|
|
DROP PROC IF EXISTS generate_customer_return_clusters;
|
|
GO
|
|
CREATE procedure [dbo].[generate_customer_return_clusters]
|
|
AS
|
|
/*
|
|
This procedure uses R to classify customers into different groups based on their
|
|
purchase & return history.
|
|
*/
|
|
BEGIN
|
|
DECLARE @duration FLOAT
|
|
, @instance_name NVARCHAR(100) = @@SERVERNAME
|
|
, @database_name NVARCHAR(128) = db_name()
|
|
|
|
-- Input query to generate the purchase history & return metrics
|
|
, @input_query NVARCHAR(MAX) = N'
|
|
SELECT
|
|
ss_customer_sk AS customer,
|
|
round(CASE WHEN ((orders_count = 0) OR (returns_count IS NULL) OR (orders_count IS NULL) OR ((returns_count / orders_count) IS NULL) ) THEN 0.0 ELSE (cast(returns_count as nchar(10)) / orders_count) END, 7) AS orderRatio,
|
|
round(CASE WHEN ((orders_items = 0) OR(returns_items IS NULL) OR (orders_items IS NULL) OR ((returns_items / orders_items) IS NULL) ) THEN 0.0 ELSE (cast(returns_items as nchar(10)) / orders_items) END, 7) AS itemsRatio,
|
|
round(CASE WHEN ((orders_money = 0) OR (returns_money IS NULL) OR (orders_money IS NULL) OR ((returns_money / orders_money) IS NULL) ) THEN 0.0 ELSE (cast(returns_money as nchar(10)) / orders_money) END, 7) AS monetaryRatio,
|
|
round(CASE WHEN ( returns_count IS NULL ) THEN 0.0 ELSE returns_count END, 0) AS frequency
|
|
|
|
FROM
|
|
(
|
|
SELECT
|
|
ss_customer_sk,
|
|
-- return order ratio
|
|
COUNT(distinct(ss_ticket_number)) AS orders_count,
|
|
-- return ss_item_sk ratio
|
|
COUNT(ss_item_sk) AS orders_items,
|
|
-- return monetary amount ratio
|
|
SUM( ss_net_paid ) AS orders_money
|
|
FROM store_sales s
|
|
GROUP BY ss_customer_sk
|
|
) orders
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
sr_customer_sk,
|
|
-- return order ratio
|
|
count(distinct(sr_ticket_number)) as returns_count,
|
|
-- return ss_item_sk ratio
|
|
COUNT(sr_item_sk) as returns_items,
|
|
-- return monetary amount ratio
|
|
SUM( sr_return_amt ) AS returns_money
|
|
FROM store_returns
|
|
GROUP BY sr_customer_sk
|
|
) returned ON ss_customer_sk=sr_customer_sk
|
|
'
|
|
|
|
EXEC sp_execute_external_script
|
|
@language = N'R'
|
|
, @script = N'
|
|
# Define the connection string
|
|
connStr <- paste("Driver=SQL Server;Server=", instance_name, ";Database=", database_name, ";Trusted_Connection=true;", sep="");
|
|
|
|
|
|
# Input customer data that needs to be classified. This is the result we get from our query
|
|
customer_returns <- RxSqlServerData(sqlQuery = input_query,
|
|
colClasses = c(customer = "numeric", orderRatio = "numeric", itemsRatio = "numeric", monetaryRatio = "numeric", frequency = "numeric"),
|
|
connectionString = connStr);
|
|
|
|
# Output table to hold the customer cluster mappings
|
|
return_cluster = RxSqlServerData(table = "customer_return_clusters", connectionString = connStr);
|
|
|
|
# set.seed for random number generator for predictability
|
|
set.seed(10);
|
|
|
|
# generate clusters using rxKmeans and output clusters to a table called "customer_return_clusters".
|
|
clust <- rxKmeans( ~ orderRatio + itemsRatio + monetaryRatio + frequency, customer_returns, numClusters = 4
|
|
, outFile = return_cluster, outColName = "cluster", writeModelVars = TRUE , extraVarsToWrite = c("customer"), overwrite = TRUE);
|
|
'
|
|
, @input_data_1 = N''
|
|
, @params = N'@instance_name nvarchar(100), @database_name nvarchar(128), @input_query nvarchar(max), @duration float OUTPUT'
|
|
, @instance_name = @instance_name
|
|
, @database_name = @database_name
|
|
, @input_query = @input_query
|
|
, @duration = @duration OUTPUT;
|
|
END;
|
|
|
|
GO
|
|
|
|
|
|
--Empty table of the results before running the stored procedure
|
|
TRUNCATE TABLE customer_return_clusters;
|
|
|
|
--Execute the clustering. This will load the table customer_return_clusters with cluster mappings
|
|
EXEC [dbo].[generate_customer_return_clusters];
|
|
|
|
--Now select data from table customer_return_clusters to verify that the clustering data was loaded
|
|
SELECT * FROM customer_return_clusters;
|
|
|
|
--Select email addresses of customers in cluster 1
|
|
SELECT customer.[c_email_address], customer.c_customer_sk
|
|
FROM dbo.customer
|
|
JOIN
|
|
[dbo].[customer_return_clusters] as r
|
|
ON r.customer = customer.c_customer_sk
|
|
WHERE r.cluster = 3 |