In this R Markdown file we conduct an econometric analysis of paired comparison evidence to estimate 10 incremental effects and two scale parameters (i.e., one for coma comparisons and one for paired comparisons) of the US EQ-5D-Y-3L data. This software is free for private, non-commercial use only. Please contact the authors for commercial usage.
We estimate a hybrid model where we combine paired comparison and coma comparison data, for the complete and per block data. We then use the bootstrap method to estimate the standard errors.
#setwd("C:/Users/maksat/Box Sync/USF PC/EuroQol/Kaisen task/paper1/Wave 1/markdown")
setwd("~/markdown/pc")
# clear memory
rm(list = ls())
# use libraries
library(reshape2) # for command dcast and melt
library(dplyr) # for command %>%
library(cfdecomp) # for cluster.resample
library(doParallel) # for doParallel
library(foreach) # for doParallel
library(iterators) # for doParallel
library(parallel) # for doParallel
library(knitr) # for kable
library(ggplot2) # for plot
library(ggrepel) # for geom_text_repel
library(epiR) # for CCC
library(DescTools) # for CCC
Below, we present data notations and definitions of new variables created.
Subject, i = 1 to N; task, t = 1 to T;
T1 is the number of coma comparison tasks and paired comparison tasks including warmup
T2 is the number of kaizen tasks including warmup
‘set’ is an indicator of set type:
0 paired or coma comparison
1 first kaizen response (1 improvement)
2 second kaizen response (2 improvements)
3 third kaizen response (3 improvements)
‘MO’ is Mobility, Levels 1 to 3
‘SC’ is Self-Care, Levels 1 to 3
‘UA’ is Usual Activities, Levels 1 to 3
‘PD’ is Pain/Discomfort, Levels 1 to 3
‘AD’ is Anxiety Depression, Levels 1 to 3
‘COMA’ is an indicator for coma, 1 if coma; 0 otherwise
‘gamma1’ is the log-scale factor of paired comparisons
‘gamma2’ is the log-scale factor of coma comparison
# specify the setting
N = 631 ## number of survey record
Z = 230 ## number of variables per record
Z1 = 131 ## the number of respondent-specific variables
Z2 = 28 ## the number of task-specific variables
Z3 = 71 ## the number of temporal variables (e.g., page times)
T1 = 17 ## number of paired comparisons per respondent
T2 = 11 ## number of kaizen tasks per respondent
spec = c(N, Z, Z1, Z2, Z3, T1, T2) ## summary of setting specification
# log-likelihood function of logit with two gammas
llf_g <- function(param0, X) {
param1 <- matrix(param0[1:11],nrow=1)
gamma1 <- as.matrix(
exp(param0[12])*as.matrix(as.numeric(X$tnum2<=6),ncol=1)+
exp(param0[13])*as.matrix(as.numeric(X$tnum2>6),ncol=1),ncol=1)
suffixes <- c(".A1", ".A2", ".A3", ".A4", ".A5", ".A6")
v <- matrix(0, nrow = nrow(X), ncol = length(suffixes)) # value of objects
for (i in seq_along(suffixes)) {
col_indices <- grep(paste0(suffixes[i], "$"), names(X))
subset_X <- X[, col_indices]
v[, i] <- as.matrix(subset_X) %*% t(param1)
v[, i] <- exp(gamma1[,1]*(1-v[, i]))
v[v==exp(gamma1[,1])] <- 0
}
return(colSums(log( v[,1] / (v %*% matrix(data = 1, nrow = 6, ncol = 1))) ))
}
# block estimation function
doBlock <- function(i) {
param0 <- matrix(param0[i, 1:ncol(param0)],nrow=1)
colnames(param0) <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2",
"COMA","gamma1","gamma2")
start_values <- c(unlist(param0[1, 1:ncol(param0)]))
data_subset <- boot[boot$subjectindex == i, ]
model <- maxLik::maxLik(llf_g, X = data_subset, start = start_values,
method = method_ml, fixed = "COMA",
control = list(iterlim = iteration_ml,
tol = tol_ml, printLevel = 2))
return(matrix(model$estimate[1:13],nrow=1))
}
# load the respondent data
resp_i = read.csv("resp_i_231010.csv")
resp_i <- resp_i[resp_i$incomplete==0,] ## Only completes
# load the task data
task_it = read.csv("task_it_231012.csv")
# load kaizen evidence parameter estimates
kaizen_evidence = read.csv("kaizen_evidence.csv")
In this section we recode the data to create the variables defined above
# Format from wide to long
id_var <- c("id","kz","tnum2","set")
task_ita = data.frame(melt(task_it, id.vars = id_var, value.name = "A"))
# Create attribute variables MO1 to AD2, LLS and COMA
prefixes <- c("MO", "SC", "UA", "PD", "AD")
for (i in c(1:5)) {
task_ita[paste0(prefixes[[i]], 1)] <- as.numeric(as.numeric(
substr(task_ita$A, i, i)) > 1 )*as.numeric(task_ita$A!="44444")
task_ita[paste0(prefixes[[i]], 2)] <- as.numeric(as.numeric(
substr(task_ita$A, i, i)) > 2 )*as.numeric(task_ita$A!="44444")
}
task_ita$COMA <- as.numeric(task_ita$A=="44444") #create COMA
# Format from long to wide
task_ita <- task_ita[,!(names(task_ita) %in% c("A"))]
attr_var <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2","COMA")
logit_it <- reshape(task_ita, direction = "wide", idvar = id_var,
timevar = "variable", v.names = attr_var)
logit_it[is.na(logit_it)] <- 0
# Append the block indicator
logit_it <- merge(logit_it, subset(resp_i, select = c(id, subjectindex)),
by="id", all.x = T)
# Select only the coma comparison and kaizen responses
logit_it <- logit_it[logit_it$set==0,]
In this section, we will estimate 10 incremental effects and two scale parameters by incorporating the coma comparisons with the evidence from the paired comparisons.
seed <- 231012 # random number generator
tol_ml <- 1e-8 # tolerance criterion for maximum likelihood
method_ml <- "BFGS" # maximization estimation technique for maximum likelihood
iteration_ml <- 150 # number of iteration for maximum likelihood
Using the conditional logit (CL) model, we will estimate the preference weights β and ancillary parameters (e.g., σ) on a coma scale using the preference evidence from the paired comparisons.
# CC and KZ Hybrid (two gammas)
g2_var <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2",
"COMA","gamma1","gamma2")
param0 <- cbind(t(as.matrix(rep(0.1,10), nrow=1)),
t(as.matrix(c(1,0.5,1.5),nrow=1)))
colnames(param0) <- g2_var
g2_model <- (maxLik::maxLik(llf_g, X=logit_it,
start = c(unlist(param0[1,1:ncol(param0)])),
method = method_ml, fixed = "COMA",
control = list(iterlim=iteration_ml,
tol=tol_ml, printLevel=0)))
summary(g2_model) # BFGS results
## --------------------------------------------
## Maximum Likelihood estimation
## BFGS maximization, 175 iterations
## Return code 0: successful convergence
## Log-Likelihood: -6223.723
## 12 free parameters
## Estimates:
## Estimate Std. error t value Pr(> t)
## MO1 0.03378 0.01627 2.076 0.0379 *
## MO2 0.12765 0.01829 6.979 2.96e-12 ***
## SC1 0.04057 0.01601 2.534 0.0113 *
## SC2 0.08140 0.01559 5.222 1.77e-07 ***
## UA1 0.02175 0.01721 1.264 0.2062
## UA2 0.13302 0.01919 6.933 4.12e-12 ***
## PD1 0.19998 0.01699 11.772 < 2e-16 ***
## PD2 0.42513 0.02659 15.989 < 2e-16 ***
## AD1 0.06146 0.01389 4.424 9.68e-06 ***
## AD2 0.24515 0.01851 13.242 < 2e-16 ***
## COMA 1.00000 0.00000 NA NA
## gamma1 0.27316 0.13588 2.010 0.0444 *
## gamma2 1.08050 0.10265 10.526 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------
We now start the parallel computing to compute the standard errors (SE) using bootstrap method. In our bootstrap method, we will re-sample with replacement and compute the SE for 10 preference weights and 2 scaling parameters.
we first conduct the bootstrap method to the complete data.
reps = 1000
r <- matrix(0,reps,13) # results matrix
param0 <- t(g2_model$estimate[1:13]) # starting values
colnames(param0) <- g2_var
colnames(r) <- g2_var
start_values <- c(unlist(param0[1, 1:ncol(param0)]))
start_time <- Sys.time() # track computational time
for (i in 1:reps) { # About 8 hours
boot <- rbind(cluster.resample(logit_it[logit_it$subjectindex == 1, ],
cluster.name="id", size = NA, newID = F),
cluster.resample(logit_it[logit_it$subjectindex == 2, ],
cluster.name="id", size = NA, newID = F),
cluster.resample(logit_it[logit_it$subjectindex == 3, ],
cluster.name="id", size = NA, newID = F),
cluster.resample(logit_it[logit_it$subjectindex == 4, ],
cluster.name="id", size = NA, newID = F))
final_model <- (maxLik::maxLik(llf_g, X=boot,
start = start_values, method = method_ml, fixed = "COMA",
control = list(iterlim=iteration_ml, tol=tol_ml, printLevel=0)))
r[i,1:13] <- final_model$estimate[1:13]
end_time <- Sys.time()
}
boot231016a <-as.data.frame(r)
boot_se = apply(boot231016a, 2, function(x) sd(x))
results_final = round(cbind(g2_model[["estimate"]],boot_se), digits = 3)
colnames(results_final) = c("Estimates", "Bootstrap SE")
kable(results_final, align = "c", caption = "**Summary Statistics of Bootstrap
Method for Final Data**", row.names = TRUE, escape = FALSE, centering = T)
| Estimates | Bootstrap SE | |
|---|---|---|
| MO1 | 0.034 | 0.017 |
| MO2 | 0.128 | 0.019 |
| SC1 | 0.041 | 0.016 |
| SC2 | 0.081 | 0.015 |
| UA1 | 0.022 | 0.016 |
| UA2 | 0.133 | 0.019 |
| PD1 | 0.200 | 0.019 |
| PD2 | 0.425 | 0.032 |
| AD1 | 0.061 | 0.014 |
| AD2 | 0.245 | 0.020 |
| COMA | 1.000 | 0.000 |
| gamma1 | 0.273 | 0.133 |
| gamma2 | 1.080 | 0.106 |
pc_evidence = as.matrix(g2_model[["estimate"]][1:10])
kaizen_evidence = as.matrix(kaizen_evidence)[1:10]
pc_kz = data.frame(cbind(pc_evidence, kaizen_evidence))
colnames(pc_kz) = c("pc","kz")
# calculate the correlations and concordance
Lin = formatC(round(CCC(pc_kz$pc,pc_kz$kz)[["rho.c"]][["est"]], digits = 3),
format = "f", digits = 3)
Pears = formatC(round(cor(pc_kz$pc,pc_kz$kz, method = "pearson"), digits = 3),
format = "f", digits = 3)
Spear = formatC(round(cor(pc_kz$pc,pc_kz$kz, method = "spearman"), digits = 3),
format = "f", digits = 3)
ggplot(pc_kz, aes(kz,pc)) + geom_point() +
geom_text_repel(aes(label = row.names(pc_kz))) + labs(x = "Kaizen Tasks") +
labs(y = "Paired Comparisons") +
labs(title = "Kaizen and Paired Comparison Estimates (Experience Scale)") +
xlim(0,0.5) + ylim(0,0.5) +
annotate(
"text", x = 0, y = 0.4,
label = paste("\n Lin: ", Lin, "\n Pearson: ", Pears, "\n Spearman ", Spear),
size = 5, hjust = 0)
write.csv(boot231016a, file = "boot_se.csv", row.names = F)
write.csv(pc_evidence, file = "pc_evidence.csv", row.names = F)