r - Insert missing dates in dataframe for each product and shop -
here's datafile.
location.name item.description sort.in.date orderedqty hc overijse pro b 731 7/1/2015 2 hc overijse pro b 731 8/3/2015 2 hc overijse pro b 931 8/27/2015 1 hc overijse pro b 933 6/24/2015 2 hc overijse pro b 933 7/22/2015 2 hc overijse pro b 933 7/29/2015 1 hc overijse pro r 731 6/16/2015 2 hc overijse pro r 731 8/3/2015 2 hannah thieuw pro b 933 7/14/2015 2 hannah thieuw pro r 731 8/14/2015 2 hc asse pro b 731 6/12/2015 2 hc asse pro b 931 6/12/2015 2 hc asse pro b 933 6/4/2015 2 hc asse pro r 731 6/8/2015 2 hc asse pro r 731 6/10/2015 2 hc asse pro r 731 6/17/2015 2 hc asse pro r 731 7/9/2015 2 hc asse pro r 731 7/27/2015 2 hc asse pro r 732 6/16/2015 2
my question how calculate average ordered quantity per day/week period 6/1/20105->8/31/2015 (without weekends). per location , per product. have find way insert zero's dataset calculate average. (this small value). need stand dev. used code calculating average orderdqty per order , time between 2 orders.
fe <- fe[order(fe$item.description,fe$location.name,fe$sort.in.date), ] sfe<-dlply(fe, .(fe$item.description,fe$location.name)) fe$daybetween <- unlist(sapply(seq_along(sfe), function(x) diff(as.date(c(na, as.date(sfe[[x]][,3] , format="%d/%m/%y") ))) )) filteredexport<-fe remove(fe) results<-ddply(filteredexport, .(location.code,location.name,item.description),summarize, n=length(item.description), averageqty = mean(orderedqty),sdaverageqty=sd(orderedqty) , avgtimebetweenorder = mean(daybetween,na.rm=true),sdavgtimebetweenorder=sd(daybetween,na.rm=true))
here dput of data:
structure(list(location.name = c("hc overijse", "hc overijse", "hc overijse", "hc overijse", "hc overijse", "hc overijse", "hc overijse", "hc overijse", "hannah thieuw", "hannah thieuw", "hc asse", "hc asse", "hc asse", "hc asse", "hc asse", "hc asse", "hc asse", "hc asse", "hc asse"), item.description = c("pro b 731", "pro b 731", "pro b 931", "pro b 933", "pro b 933", "pro b 933", "pro r 731", "pro r 731", "pro b 933", "pro r 731", "pro b 731", "pro b 931", "pro b 933", "pro r 731", "pro r 731", "pro r 731", "pro r 731", "pro r 731", "pro r 732"), sort.in.date = structure(c(16617, 16650, 16674, 16610, 16638, 16645, 16602, 16650, 16630, 16661, 16598, 16598, 16590, 16594, 16596, 16603, 16625, 16643, 16602), class = "date"), orderedqty = c(2l, 2l, 1l, 2l, 2l, 1l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l)), .names = c("location.name", "item.description", "sort.in.date", "orderedqty"), row.names = c(na, -19l), class = "data.frame")
library(plyr) library(dplyr) library(zoo) df %>% mutate(date = sort.in.date %>% as.date, day_of_week = date %>% format("%w") %>% as.numeric) %>% full_join(data_frame(date = as.date("6/1/2015"):as.date("8/31/2015") %>% as.date) ) %>% mutate(orderedqty.expand = orderedqty %>% mapvalues(na, 0) ) %>% filter(date %>% between(as.date("6/1/2015"), as.date("8/31/2015") ) & day_of_week %>% between(1, 5) ) %>% group_by(location.name, item.description) %>% arrange(date) %>% summarize(orderedqty.mean = mean(orderedqty.expand), orderedqty.sd = sd(orderedqty.expand), average_interval = date %>% diff %>% mean)
Comments
Post a Comment