% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partition_data.R
\name{partition_data}
\alias{partition_data}
\title{Partition Data Into Shards}
\usage{
partition_data(Data, s)
}
\arguments{
\item{Data}{A list of containing either 'regdata' or 'lgtdata' and 'Z'(optional). If 'Data' contains 'lgtdata', it should also contain 'p' number of choice alternatives.}

\item{s}{The number of shards to partition the data into.}
}
\value{
A list of 's' shards where each shard contains:

\item{p}{(integer) - Number of choice alternatives (only if 'Data' contains 'lgtdata')}
\item{lgtdata or regdata}{(list, length: n) - A list of n elements where each element contains 'X', 'y', 'beta', and 'tau'}
\item{Z}{(Matrix) - A n x nz matrix of units chars. Null if 'Data' does not contain Z [Optional]}
}
\description{
A function to partition data into s shards for use in distributed estimation.
}
\examples{

# Generate hierarchical linear data
R=1000 #number of draws
nreg=2000 #number of observational units
nobs=5 #number of observations per unit
nvar=3 #columns
nz=2

Z=matrix(runif(nreg*nz),ncol=nz) 
Z=t(t(Z)-apply(Z,2,mean))
Delta=matrix(c(1,-1,2,0,1,0), ncol = nz) 
tau0=.1
iota=c(rep(1,nobs)) 

## create arguments for rmixture
tcomps=NULL
a = diag(1, nrow=3)
tcomps[[1]] = list(mu=c(-5,0,0),rooti=a) 
tcomps[[2]] = list(mu=c(5, -5, 2),rooti=a)
tcomps[[3]] = list(mu=c(5,5,-2),rooti=a)
tpvec = c(.33,.33,.34)                               
ncomp=length(tcomps)
regdata=NULL
betas=matrix(double(nreg*nvar),ncol=nvar) 
tind=double(nreg) 
for (reg in 1:nreg) { 
  tempout=bayesm::rmixture(1,tpvec,tcomps)
  if (is.null(Z)){
    betas[reg,]= as.vector(tempout$x)  
  }else{
    betas[reg,]=Delta\%*\%Z[reg,]+as.vector(tempout$x)} 
  tind[reg]=tempout$z
  X=cbind(iota,matrix(runif(nobs*(nvar-1)),ncol=(nvar-1))) 
  tau=tau0*runif(1,min=0.5,max=1) 
  y=X\%*\%betas[reg,]+sqrt(tau)*rnorm(nobs)
  regdata[[reg]]=list(y=y,X=X,beta=betas[reg,],tau=tau) 
}

Prior1=list(ncomp=ncomp) 
keep=1
Mcmc1=list(R=R,keep=keep)
Data1=list(list(regdata=regdata,Z=Z))

length(Data1)

Data2 = partition_data(Data1, s = 3)
length(Data2)

}
\references{
Bumbaca, F. (Rico), Misra, S., & Rossi, P. E. (2020). Scalable Target Marketing: Distributed Markov Chain Monte Carlo for Bayesian Hierarchical Models. Journal of Marketing Research, 57(6), 999-1018.
}
\author{
Federico Bumbaca, Leeds School of Business, University of Colorado Boulder, \email{federico.bumbaca@colorado.edu}
}
