-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataGeneration.R
149 lines (132 loc) · 4.42 KB
/
DataGeneration.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#
# STEP 2: DataGeneration - MD Naseem Ashraf
# UPDATED: Using Entire DS from New DS Files.
#
rm(list=setdiff(ls(), "finaldataset"))
library("openxlsx")
options(java.parameters = "-Xmx2048m")
setwd("C:/Users/Naseem Ashraf/Desktop/Fall 16/DM Project sets")
library(plyr)
orderedallproducts <- read.xlsx("orderedallproductsNEW.xlsx", sheet = 1,startRow = 1, colNames = TRUE)
colnames(orderedallproducts) <- c("ProductID","Description","TransactionFreq","TotalQuantity","Customers","MeanQuantityPerTransaction","MeanQuantityPerCustomer")
transfreqsummary <- summary(orderedallproducts$TransactionFreq)
finaldataset <- read.xlsx("finaldatasetNEW.xlsx", sheet = 1,startRow = 1, colNames = TRUE)
colnames(finaldataset) <- c("ProductID","Description","TransactionFreq","TotalQuantity","Customers","MeanQuantityPerTransaction","MeanQuantityPerCustomer", "UnitPrice", "MeanEarningPerTransaction")
cutomerssummary <- summary(finaldataset$Customers)
#summary(finaldataset$Customers)
#UPDATED
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 9.0 36.0 69.2 94.0 881.0
meanQuantityPerTransactionsummary <- summary(finaldataset$MeanQuantityPerTransaction)
#summary(finaldataset$MeanQuantityPerTransaction)
#UPDATED
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.79 7.43 32.69 13.34 81000.00
meanEarningPerTransactionsummary <- summary(finaldataset$MeanEarningPerTransaction)
#summary(finaldataset$MeanEarningPerTransaction)
#UPDATED
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.47 15.01 71.72 24.03 168500.00
classifyPrioritySale <- function(currentid){ #MAIN CLASSIFICATION LOGIC
k = 0
#TransactionFreq
tfreq <- finaldataset$TransactionFreq[finaldataset$ProductID==currentid]
#Customers
tcust <- finaldataset$Customers[finaldataset$ProductID==currentid]
#Earnings
meanEarningPerTransaction <- finaldataset$UnitPrice[finaldataset$ProductID==currentid] * finaldataset$MeanQuantityPerTransaction[finaldataset$ProductID==currentid]
if((tfreq >= transfreqsummary[4]) & (tfreq < transfreqsummary[5])) #Medium TransactionFreq
{
if((tcust >= cutomerssummary[5]) & (tcust < cutomerssummary[6])) #High Number of Customers
{
#High Earning Per Transaction
if((meanEarningPerTransaction >= meanEarningPerTransactionsummary[5]) &
(meanEarningPerTransaction < meanEarningPerTransactionsummary[6]))
{
k = 1
}
else #Less Earning Per Transaction
{
k = 2
}
}
else #Less Number of Customers
{
#High Earning Per Transaction
if((meanEarningPerTransaction >= meanEarningPerTransactionsummary[5]) &
(meanEarningPerTransaction < meanEarningPerTransactionsummary[6]))
{
k = 3
}
else #Less Earning Per Transaction
{
k = 3
}
}
}
if(tfreq >= transfreqsummary[5]) #High TransactionFreq
{
if((tcust >= cutomerssummary[5]) & (tcust < cutomerssummary[6])) #High Number of Customers
{
#High Earning Per Transaction
if((meanEarningPerTransaction >= meanEarningPerTransactionsummary[5]) &
(meanEarningPerTransaction < meanEarningPerTransactionsummary[6]))
{
k = 1
}
else #Less Earning Per Transaction
{
k = 2
}
}
else #Less Number of Customers
{
#High Earning Per Transaction
if((meanEarningPerTransaction >= meanEarningPerTransactionsummary[5]) &
(meanEarningPerTransaction < meanEarningPerTransactionsummary[6]))
{
k = 2
}
else #Less Earning Per Transaction
{
k = 3
}
}
}
if(k==0)
{
k = 3
}
return(k)
}
clsin <- c()
index <- 0
dim(finaldataset) #3877 9
range(finaldataset$ProductID) #1 3877
for (pid in finaldataset$ProductID) #3877 Total Products
{
index = index + 1
if(classifyPrioritySale(pid)==1) #198-- High Priority products
{
clsin[index] <- 1
}
if(classifyPrioritySale(pid)==2) #972-- Medium Priority products
{
clsin[index] <- 2
}
if(classifyPrioritySale(pid)==3) #972-- Low Priority products
{
clsin[index] <- 3
}
}
rm(index)
clsin <- as.factor(clsin)
levels(clsin)
summary(clsin)
# 1 2 3
# 327 665 2885
finaldataset$"SalePriorityClass" <- clsin
rm(clsin)
newproductdataset <- finaldataset
write.xlsx(newproductdataset, "newproductdatasetclassifiedNEW.xlsx") ##Final Dataset with Class Attribute
summary(newproductdataset$SalePriorityClass)