Skip to content

architlfc/Products-Price-bracket-creation

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

1 Commit
 
 

Repository files navigation

Products-Price-bracket-creation

Segmenting different products into different price brackets .

Shopalyst assignemnt 2

Archit 30 January 2018

algorithm: step1: Give colnames, proper classes to labels,deal with null values Step 2: Calculated the price bracket for each category: a) applied k-means clustering on each level in the category based on its price b) give names to the clusters using max and min to identify the lowest and highest price category

diferrent approaches: 1)considered seggregating by taking quantiles[0-25,25-75 and 75-100 in this case] 2)k-means which gave a better o/p when compared to quantiles

setwd("C:/Users/Administrator/Desktop/Shopalyst")
set2 <- read.csv("price_sample.txt",header = FALSE,stringsAsFactors = F)

#View(set2)

library(dplyr)
## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
str(set2)
## 'data.frame':    29826 obs. of  3 variables:
##  $ V1: chr  "2C39A09E0F9DB7B1" "2F26A59E4943BD1C" "1A5236C8968ABFF9" "6EFC18DB0E6244D7" ...
##  $ V2: chr  "bags" "dress materials" "bags" "heels" ...
##  $ V3: chr  " 9200.0" " 3999.0" " 1699.0" " 1999.0" ...
colnames(set2) <- c("Prod_id","Category","Price")

#3 colums with 1 factor and 1 integer
  set2$Category <-as.factor(set2$Category)
  temp          <-as.integer(set2$Price)
 
  #since none present there was problem in converting to int
  tempprodid<-set2[set2$Price==" None",]
  set2[set2$Prod_id %in% tempprodid$Prod_id, ] 
##                Prod_id Category Price
## 750   756CC4617636498B   sarees  None
## 2277   806C8B53987BF5F t-shirts  None
## 3997  7391C9022E0EB5B4  ethnics  None
## 10444 55255D1720144748  ethnics  None
## 17267 46EE3937DDD447E7 t-shirts  None
## 19125 10D4A8BFFFBEB854   sarees  None
## 20290 556FDF917DFA40A4  ethnics  None
## 20953 39CEBE34C2D94814     tops  None
## 21545 4E49705FFFBE4F47 t-shirts  None
## 21718 341132235BA04F7B   shirts  None
## 29662 7C038CD7D0304AAF   sarees  None
  #these 11 rows dont have a price attached to them so we shall                                                   ignore them
  
  finaldata<-set2[!set2$Prod_id %in% tempprodid$Prod_id,]
  str(finaldata)
## 'data.frame':    29815 obs. of  3 variables:
##  $ Prod_id : chr  "2C39A09E0F9DB7B1" "2F26A59E4943BD1C" "1A5236C8968ABFF9" "6EFC18DB0E6244D7" ...
##  $ Category: Factor w/ 40 levels "accessories",..: 2 8 2 15 19 19 1 22 36 27 ...
##  $ Price   : chr  " 9200.0" " 3999.0" " 1699.0" " 1999.0" ...
  finaldata$Price<- as.integer(finaldata$Price)
  View(finaldata)

  #no need of product id for our analysis
  finaldata<-finaldata[!finaldata$Price==0,2:3]
  
  
  colSums(is.na(set2))#no missing data
##  Prod_id Category    Price 
##        0        0        0
  finaldata$bracket<- c(rep('TBD'))
prac<-finaldata[-3]
#slip ons has only 1 product so it would be useless in our estimation
prac<-prac[!prac$Category=='slip-ons',]

#calculate the bracket using k means
for(i in unique(prac$Category)){
  
  tempk<-prac[prac$Category==i,2]
  
  a<-kmeans(tempk,3,nstart=15)
  prac[prac$Category==i,"bracket"]<-a$cluster 
   
  
}





#Get the bracket labels for the diffent cluster numbers assigned to each category  
for(i in unique(prac$Category)){
  m<-min(prac[prac$Category==i,2])
  l<-prac[prac$Price==m,3]
  
  h<-max(prac[prac$Category==i,2])
  b<-prac[prac$Price==h,3]
prac$bracket <-ifelse(prac$bracket==l,'LOW',ifelse(prac$bracket==b,'HIGH','MEDIUM'))}
#View(prac)
head(prac,10)
##           Category Price bracket
## 1             bags  9200  MEDIUM
## 2  dress materials  3999     LOW
## 3             bags  1699  MEDIUM
## 4            heels  1999     LOW
## 5        jewellery 13111  MEDIUM
## 6        jewellery  6139    HIGH
## 7      accessories   999  MEDIUM
## 8         leggings   599    HIGH
## 9         t-shirts  3599  MEDIUM
## 10          sarees   999  MEDIUM

About

Segmenting different products into different price brackets .

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published