1 Purpose

In this R Markdown file we conduct an econometric analysis of paired comparison evidence to estimate 10 incremental effects and two scale parameters (i.e., one for coma comparisons and one for paired comparisons) of the US EQ-5D-Y-3L data. This software is free for private, non-commercial use only. Please contact the authors for commercial usage.

2 Summary

We estimate a hybrid model where we combine paired comparison and coma comparison data, for the complete and per block data. We then use the bootstrap method to estimate the standard errors.

3 Load the libraries

#setwd("C:/Users/maksat/Box Sync/USF PC/EuroQol/Kaisen task/paper1/Wave 1/markdown")
setwd("~/markdown/pc")
# clear memory
rm(list = ls())
# use libraries
library(reshape2) # for command dcast and melt
library(dplyr) # for command %>%
library(cfdecomp) # for cluster.resample
library(doParallel) # for doParallel
library(foreach) # for doParallel
library(iterators) # for doParallel
library(parallel) # for doParallel
library(knitr) # for kable
library(ggplot2) # for plot
library(ggrepel) # for geom_text_repel
library(epiR) # for CCC
library(DescTools) # for CCC

4 Data notation

Below, we present data notations and definitions of new variables created.

4.1 Subject and task index

Subject, i = 1 to N; task, t = 1 to T;

T1 is the number of coma comparison tasks and paired comparison tasks including warmup

T2 is the number of kaizen tasks including warmup

4.2 Set

‘set’ is an indicator of set type:
0 paired or coma comparison
1 first kaizen response (1 improvement)
2 second kaizen response (2 improvements)
3 third kaizen response (3 improvements)

4.3 Attribute variables

‘MO’ is Mobility, Levels 1 to 3
‘SC’ is Self-Care, Levels 1 to 3
‘UA’ is Usual Activities, Levels 1 to 3
‘PD’ is Pain/Discomfort, Levels 1 to 3
‘AD’ is Anxiety Depression, Levels 1 to 3
‘COMA’ is an indicator for coma, 1 if coma; 0 otherwise

4.4 Ancillary parameters

‘gamma1’ is the log-scale factor of paired comparisons
‘gamma2’ is the log-scale factor of coma comparison

5 Specify the settings

# specify the setting
N = 631 ## number of survey record
Z = 230 ## number of variables per record
Z1 = 131 ## the number of respondent-specific variables 
Z2 = 28  ## the number of task-specific variables
Z3 = 71  ## the number of temporal variables (e.g., page times)  
T1 = 17 ## number of paired comparisons per respondent
T2 = 11 ## number of kaizen tasks per respondent
spec = c(N, Z, Z1, Z2, Z3, T1, T2)  ## summary of setting specification

6 Specify the functions

# log-likelihood function of logit with two gammas
llf_g <- function(param0, X) {
  param1 <- matrix(param0[1:11],nrow=1)
  gamma1 <- as.matrix(
    exp(param0[12])*as.matrix(as.numeric(X$tnum2<=6),ncol=1)+
      exp(param0[13])*as.matrix(as.numeric(X$tnum2>6),ncol=1),ncol=1)
  suffixes <- c(".A1", ".A2", ".A3", ".A4", ".A5", ".A6")
  v <- matrix(0, nrow = nrow(X), ncol = length(suffixes)) # value of objects
  for (i in seq_along(suffixes)) {
    col_indices <- grep(paste0(suffixes[i], "$"), names(X))
    subset_X <- X[, col_indices]
    v[, i] <- as.matrix(subset_X) %*% t(param1)
    v[, i] <- exp(gamma1[,1]*(1-v[, i]))
    v[v==exp(gamma1[,1])] <- 0
  }
  return(colSums(log( v[,1] / (v %*% matrix(data = 1, nrow = 6, ncol = 1))) ))
}

# block estimation function
doBlock <- function(i) {
  param0 <- matrix(param0[i, 1:ncol(param0)],nrow=1)
  colnames(param0) <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2",
                        "COMA","gamma1","gamma2")
  start_values <- c(unlist(param0[1, 1:ncol(param0)]))
  data_subset <- boot[boot$subjectindex == i, ]
  model <- maxLik::maxLik(llf_g, X = data_subset, start = start_values,
                          method = method_ml, fixed = "COMA",
                          control = list(iterlim = iteration_ml,
                                         tol = tol_ml, printLevel = 2))
  return(matrix(model$estimate[1:13],nrow=1)) 
}

7 Load the Data

# load the respondent data
resp_i = read.csv("resp_i_231010.csv")
resp_i <- resp_i[resp_i$incomplete==0,] ## Only completes
# load the task data
task_it = read.csv("task_it_231012.csv")
# load kaizen evidence parameter estimates
kaizen_evidence = read.csv("kaizen_evidence.csv")

8 Recode the data

In this section we recode the data to create the variables defined above

# Format from wide to long
id_var <- c("id","kz","tnum2","set")
task_ita = data.frame(melt(task_it, id.vars = id_var, value.name = "A"))

# Create attribute variables MO1 to AD2, LLS and COMA
prefixes <- c("MO", "SC", "UA", "PD", "AD")
for (i in c(1:5)) {
  task_ita[paste0(prefixes[[i]], 1)] <- as.numeric(as.numeric(
      substr(task_ita$A, i, i)) > 1 )*as.numeric(task_ita$A!="44444")
  task_ita[paste0(prefixes[[i]], 2)] <- as.numeric(as.numeric(
      substr(task_ita$A, i, i)) > 2 )*as.numeric(task_ita$A!="44444")
}
task_ita$COMA <- as.numeric(task_ita$A=="44444") #create COMA

# Format from long to wide
task_ita <- task_ita[,!(names(task_ita) %in% c("A"))]

attr_var <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2","COMA")
logit_it <- reshape(task_ita, direction = "wide", idvar = id_var,
                    timevar = "variable", v.names = attr_var)
logit_it[is.na(logit_it)] <- 0

# Append the block indicator
logit_it <- merge(logit_it,  subset(resp_i, select = c(id, subjectindex)),
                     by="id", all.x = T)

# Select only the coma comparison and kaizen responses
logit_it <- logit_it[logit_it$set==0,]

9 Logit estimation

In this section, we will estimate 10 incremental effects and two scale parameters by incorporating the coma comparisons with the evidence from the paired comparisons.

9.1 Estimation settings

seed <- 231012  # random number generator
tol_ml <- 1e-8 # tolerance criterion for maximum likelihood
method_ml <- "BFGS" # maximization estimation technique for maximum likelihood
iteration_ml <- 150 # number of iteration for maximum likelihood

9.2 Paired comparisons and kaizen tasks hybrid

Using the conditional logit (CL) model, we will estimate the preference weights β and ancillary parameters (e.g., σ) on a coma scale using the preference evidence from the paired comparisons.

# CC and KZ Hybrid (two gammas)
g2_var <- c("MO1","MO2","SC1","SC2","UA1","UA2","PD1","PD2","AD1","AD2",
              "COMA","gamma1","gamma2")
param0 <- cbind(t(as.matrix(rep(0.1,10), nrow=1)),
                  t(as.matrix(c(1,0.5,1.5),nrow=1)))
colnames(param0) <- g2_var
g2_model <- (maxLik::maxLik(llf_g, X=logit_it, 
                            start = c(unlist(param0[1,1:ncol(param0)])),
                            method = method_ml, fixed = "COMA",
                            control = list(iterlim=iteration_ml, 
                                           tol=tol_ml, printLevel=0)))
summary(g2_model) # BFGS results

## --------------------------------------------
## Maximum Likelihood estimation
## BFGS maximization, 175 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -6223.723 
## 12  free parameters
## Estimates:
##        Estimate Std. error t value  Pr(> t)    
## MO1     0.03378    0.01627   2.076   0.0379 *  
## MO2     0.12765    0.01829   6.979 2.96e-12 ***
## SC1     0.04057    0.01601   2.534   0.0113 *  
## SC2     0.08140    0.01559   5.222 1.77e-07 ***
## UA1     0.02175    0.01721   1.264   0.2062    
## UA2     0.13302    0.01919   6.933 4.12e-12 ***
## PD1     0.19998    0.01699  11.772  < 2e-16 ***
## PD2     0.42513    0.02659  15.989  < 2e-16 ***
## AD1     0.06146    0.01389   4.424 9.68e-06 ***
## AD2     0.24515    0.01851  13.242  < 2e-16 ***
## COMA    1.00000    0.00000      NA       NA    
## gamma1  0.27316    0.13588   2.010   0.0444 *  
## gamma2  1.08050    0.10265  10.526  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------

10 Bootstrap the standard errors

We now start the parallel computing to compute the standard errors (SE) using bootstrap method. In our bootstrap method, we will re-sample with replacement and compute the SE for 10 preference weights and 2 scaling parameters.

10.1 Complete data

we first conduct the bootstrap method to the complete data.

reps = 1000
r <- matrix(0,reps,13) # results matrix
param0 <- t(g2_model$estimate[1:13])  # starting values
colnames(param0) <- g2_var
colnames(r) <- g2_var
start_values <- c(unlist(param0[1, 1:ncol(param0)]))

start_time <- Sys.time()  # track computational time
for (i in 1:reps) {  # About 8 hours
  boot <- rbind(cluster.resample(logit_it[logit_it$subjectindex == 1, ],
                                 cluster.name="id", size = NA, newID = F),
                cluster.resample(logit_it[logit_it$subjectindex == 2, ],
                                 cluster.name="id", size = NA, newID = F),
                cluster.resample(logit_it[logit_it$subjectindex == 3, ],
                                 cluster.name="id", size = NA, newID = F),
                cluster.resample(logit_it[logit_it$subjectindex == 4, ],
                                 cluster.name="id", size = NA, newID = F))
  final_model <- (maxLik::maxLik(llf_g, X=boot, 
          start = start_values, method = method_ml, fixed = "COMA",
          control = list(iterlim=iteration_ml, tol=tol_ml, printLevel=0)))
  r[i,1:13] <- final_model$estimate[1:13]
  end_time <- Sys.time()
}

boot231016a <-as.data.frame(r)
boot_se = apply(boot231016a, 2, function(x) sd(x))
results_final = round(cbind(g2_model[["estimate"]],boot_se), digits = 3)
colnames(results_final) = c("Estimates", "Bootstrap SE")
kable(results_final, align = "c", caption = "**Summary Statistics of Bootstrap 
      Method for Final Data**", row.names = TRUE, escape = FALSE, centering = T)

**Summary Statistics of Bootstrap Method for Final Data**
	Estimates	Bootstrap SE
MO1	0.034	0.017
MO2	0.128	0.019
SC1	0.041	0.016
SC2	0.081	0.015
UA1	0.022	0.016
UA2	0.133	0.019
PD1	0.200	0.019
PD2	0.425	0.032
AD1	0.061	0.014
AD2	0.245	0.020
COMA	1.000	0.000
gamma1	0.273	0.133
gamma2	1.080	0.106

11 Comparison of paired comparison and kaizen task evidence

pc_evidence = as.matrix(g2_model[["estimate"]][1:10])
kaizen_evidence = as.matrix(kaizen_evidence)[1:10]
pc_kz = data.frame(cbind(pc_evidence, kaizen_evidence))
colnames(pc_kz) = c("pc","kz")

# calculate the correlations and concordance
Lin = formatC(round(CCC(pc_kz$pc,pc_kz$kz)[["rho.c"]][["est"]], digits = 3), 
              format = "f", digits = 3)
Pears = formatC(round(cor(pc_kz$pc,pc_kz$kz, method = "pearson"), digits = 3), 
              format = "f", digits = 3)
Spear = formatC(round(cor(pc_kz$pc,pc_kz$kz, method = "spearman"), digits = 3), 
              format = "f", digits = 3)


ggplot(pc_kz, aes(kz,pc)) + geom_point() + 
  geom_text_repel(aes(label = row.names(pc_kz))) + labs(x = "Kaizen Tasks") + 
  labs(y = "Paired Comparisons") + 
  labs(title = "Kaizen and Paired Comparison Estimates (Experience Scale)") +
  xlim(0,0.5) + ylim(0,0.5) + 
  annotate(
  "text", x = 0, y = 0.4, 
  label = paste("\n Lin: ", Lin, "\n Pearson: ", Pears, "\n Spearman ", Spear), 
  size = 5, hjust = 0)

12 Save the data

write.csv(boot231016a, file = "boot_se.csv", row.names = F)
write.csv(pc_evidence, file = "pc_evidence.csv", row.names = F)

Econometric analysis of paired comparison evidence

Maksat Jumamyradov, maksat@usf.edu; Benjamin M. Craig, bcraig@usf.edu

2024-02-12