# Item Analysis

# Memory clear
rm(list=ls())


# Set work directory
setwd("e:\\AT_UTF\\")



# Data File Name
ansFileName <- "Ruikei Data.csv"


# Key File name
keyFileName <- "Ruikei Key.csv"



# Column Number of First Test Item:
firstItemCol <- 5

# Option Categories
 OPTIONS <- c("1","2","3","4","9","0")

# OPTIONS <- c("A","B","C","D","E","F")




# Number of groups in trace line
NGROUP <- 4

# Following Kelly's percentages
KELLEY <- TRUE 



# Score output file name
scoFileName <- "ScoreData.csv"

# Total Score Summary output file name
totFileName <- "TotalSummary.csv"

# Item Analysis output file name
itaFileName <- "ItemAnalysis.csv"

# Result of Each Item output file name
reiFileName <- "ResultEachItem.csv"

# Trace line data output file name
tldFileName <- "TraceLineData.csv"

# Trace line graph output file name
tlgFileName <- "TraceLineGraph.pdf"





#------------------------------------------------------------
#  Basically, no change is necessary in the following.
#-------------------------------------------------------------

# Digits
rdgts <- 2

# -----------------------------------------------------------

# Data import
dans <- read.table(ansFileName, header=T, sep=",")
dkey <- read.table(keyFileName, header=T, sep=",", na.strings=c("", NA))

# Number of Items
nItem <- ncol(dans) - firstItemCol + 1

# Number of OPTIONS
nOption <- length(OPTIONS)

# Sample size
nSample <- nrow(dans)

# Item names
itemNames <- colnames(dans)[firstItemCol : ncol(dans)]

# ID variable name
idName <- colnames(dans)[1]


# Key Category
dkeyc <- dkey[grep("C", dkey$CS),]
colnames(dkeyc) <- c("Item", "CS", OPTIONS)

# Key Score
dkeys <- as.matrix(dkey[grep("S", dkey$CS),])
colnames(dkeys) <- c("Item", "CS",OPTIONS)
dkeys1 <- dkeys[,c("Item", "CS")]
dkeys2 <- dkeys[,c(OPTIONS)]
for(j in OPTIONS){
 dkeys2[,j] <- ifelse(is.na(dkeys2[,j]), 0, dkeys2[,j])
}
storage.mode(dkeys2) <- "double"
dkeys <- data.frame(dkeys1, dkeys2)
colnames(dkeys) <- c("Item", "CS",OPTIONS)


# Score matrix
score <- as.data.frame(matrix(c(0), nrow=nSample, ncol=nItem ))
colnames(score) <- c(itemNames)

# Scoring
for(i in itemNames){
 dansitmp <- matrix(c(0), nrow=nSample, ncol=nOption)
 colnames(dansitmp) <- OPTIONS
 for(j in OPTIONS){
  dansitmp[,j] <- ifelse(is.na(dans[,i]), 0, 
                    ifelse(grepl(j, dans[,i]), dkeys[dkeys$Item==i, j], 0))
 }
 score[,i] <- rowSums(dansitmp)
}


# Total scores
score$Total <- rowSums(score[,itemNames])
score$Prop <-  floor(100 * (rowMeans(score[,itemNames]) +0.005)) / 100

# Score Data
dscore <- data.frame(dans[,c(1:(firstItemCol-1))], score)

# Output Score data
write.table(dscore, scoFileName, row.names=F, sep=",")



# Total Scores Summary
tot <- prop <- NULL
tot$Score <- "Total"
tot$Items <- length(itemNames)
tot$N <- nSample
tot$Mean <- floor(10^rdgts * (mean(score[,"Total"])+0.005))/10^rdgts
tot$SD <-   floor(10^rdgts * (  sd(score[,"Total"])+0.005))/10^rdgts
tot$Min <- min(score[,"Total"])
tot$Med <- median(score[,"Total"])
tot$Max <- max(score[,"Total"])
tot$Alpha <- floor(10^rdgts * (length(itemNames)/(length(itemNames)-1) *
                        (1-sum(apply(score[,itemNames],2,var))/var(score[,"Total"]))+0.005)) / 10^rdgts
prop$Score <- "Prop"
prop$Items <- tot$Items
prop$N <- tot$N
prop$Mean <- floor(10^rdgts * (mean(score[,"Total"])/prop$Items+0.005))/10^rdgts
prop$SD <-   floor(10^rdgts * (  sd(score[,"Total"])/prop$Items+0.005))/10^rdgts
prop$Min <- min(score[,"Prop"])
prop$Med <- median(score[,"Prop"])
prop$Max <- max(score[,"Prop"])
prop$Alpha <- tot$Alpha

totout <- as.data.frame(tot)
propout <- as.data.frame(prop)
tpout <- rbind(totout, propout)


# Output Total score summary
write.table(tpout, totFileName, row.names=F, sep=",")

#remove tpout, totout, propout, prop 
# rm(tpout, totout, propout, prop)



# Item analysis 
optionNames <- paste("o",OPTIONS, sep="")
itaNames <- c("Item", "N", "P", "D", "Corr", "Alpha", "Dalpha", "Categories", optionNames, "oNA")
dita <- matrix(c(0), nrow=length(itemNames), ncol=length(itaNames))
Corr <- rep(0, length(itemNames))
names(Corr) <- itemNames
rownames(dita) <- itemNames
colnames(dita) <- itaNames

dita[,"Item"] <- itemNames
dita[,"N"] <- tot$N 
dita[,"P"] <- floor(10^rdgts * (colMeans(score[,itemNames])+0.005))/ 10^rdgts
dita[,"Alpha"] <- tot$Alpha 

for(i in itemNames){
  Corr[i] <- cor(score[,i],(score[,"Total"]-score[,i]))
  dita[i,"Corr"] <- floor(10^rdgts * (Corr[i]+0.005))/ 10^rdgts
  scoreDi <- score[, itemNames[(itemNames %in% i)==F]] 
  dita[i,"Dalpha"] <- floor(10^rdgts * (ncol(scoreDi)/(ncol(scoreDi)-1) *
                              (1-sum(apply(scoreDi,2,var))/var(rowSums(scoreDi)))+0.005))/ 10^rdgts
}

dkeycs <- dkeyc[,OPTIONS]
for(j in OPTIONS){
 dkeycs[,j] <- ifelse(is.na(dkeycs[,j]),"", dkeycs[,j])
}
rownames(dkeycs) <- itemNames

vCategories <- apply(dkeycs[,OPTIONS], 1, paste, collapse="")
names(vCategories) <- itemNames
dita[,"Categories"] <- vCategories




# Data Sorting
vCorr <- Corr[Corr>0]
sNames <- names(vCorr)[order(vCorr, decreasing=T)]
sNames <- paste(",dscore$",sNames, sep="", collapse=" ")
eval(parse(text=paste("dsort <- dscore[order(dscore$Total", sNames, ",dscore[,1]),]",sep="") ))
dsort$sortID <- c(1:nrow(dsort))

# remove score, scoreDi, dscore, Corr
# rm(score,scoreDi,dscore, Corr)


# Subgroup size 
nSub <- ceiling(nSample * 0.27)

# Subgroup label
dsort$LMU <- ifelse(dsort[,"sortID"]<=nSub, "L", 
               ifelse((dsort[,"sortID"]>nSub)&(dsort[,"sortID"]<=(nSample-nSub)), "M", "U"))

# D index
dsortL <- dsort[dsort$LMU=="L",]
dsortU <- dsort[dsort$LMU=="U",]
dita[,"D"] <- floor(10^rdgts * (colMeans(dsortU[,itemNames]) - 
                colMeans(dsortL[,itemNames])+0.005))/ 10^rdgts

# remove dsortU, dsortL
# rm(dsortU)
# rm(dsortL)





# Each Item
drei <- NULL
 v1 <- c("Item", "Categories", "N", "P", "D", "Corr", "Alpha","DAlpha")

for(i in itemNames){
 dansi <- dans[,c(idName,i)]
 dsorti <- dsort[,c(idName,"Total","LMU",i)]
 colnames(dsorti) <- c(idName,"Total","LMU","iScore")
 dansi <- merge(dansi, dsorti, by=idName, all=TRUE) 

 # subjects*options matrix per item
 dansitmp <- matrix(c(0), nrow=nSample, ncol=(nOption+1))
 dansitmp <- cbind(dansi, dansitmp)
 colnames(dansitmp) <- c(idName, i, "Total", "LMU", "iScore", OPTIONS, "NA")
 for(j in OPTIONS){
  dansitmp[,j] <- ifelse(is.na(dansitmp[,i]), 0,
  ifelse(grepl(j, dansitmp[,i]), 1, 0))
 }
 dansitmp[,"NA"] <- ifelse((rowSums(dansitmp[,OPTIONS])==0), 1, 0)

 # choice ratio of each option per item
 dita[i,c(optionNames,"oNA")] <- floor(10^rdgts *(colMeans(dansitmp[,c(OPTIONS,"NA")])+0.005))/ 10^rdgts

 for(j in OPTIONS){
  if (dkeycs[i,j] == "") {
   dita[i,paste("o",j, collapse="", sep="")] <- ""
  }
 }


 # each item analysis
 v2 <- c(i, vCategories[i], dita[i,"N"],dita[i,"P"], dita[i, "D"], dita[i,"Corr"], dita[i, "Alpha"], dita[i, "Dalpha"])
 v3 <- c("Prop", OPTIONS,"NA")
 v4 <- c("All",  dita[i,c(optionNames,"oNA")])

 # UML based
 dansitmpL <- dansitmp[dansitmp[,"LMU"]=="L",]
 dansitmpM <- dansitmp[dansitmp[,"LMU"]=="M",]
 dansitmpU <- dansitmp[dansitmp[,"LMU"]=="U",] 
 v5 <- c("U",  (floor(10^rdgts * (colMeans(dansitmpU[,c(OPTIONS,"NA")])+0.005))/ 10^rdgts))
 v6 <- c("M",  (floor(10^rdgts * (colMeans(dansitmpM[,c(OPTIONS,"NA")])+0.005))/ 10^rdgts))
 v7 <- c("L",  (floor(10^rdgts * (colMeans(dansitmpL[,c(OPTIONS,"NA")])+0.005))/ 10^rdgts))
 v8 <- c("D",  (floor(10^rdgts * (colMeans(dansitmpU[,c(OPTIONS,"NA")])-colMeans(dansitmpL[,c(OPTIONS,"NA")])+0.005))/ 10^rdgts))
 v9 <- c("Corr", (floor(10^rdgts * (cor( dansitmp[,c(OPTIONS,"NA")],(dansitmp[,"Total"]-dansitmp[,"iScore"]),
                                         use = "pairwise.complete.obs")+0.005)) / 10^rdgts))
 v10 <- rep("",length(v5))

 nemp <- length(v3) - length(v1)
 if (nemp >=0) {
   v1 <- c(v1, rep("",nemp))
   v2 <- c(v2, rep("",nemp))  
 } else { 
   nemp <- abs(nemp)
   v3 <- c(v3, rep("",nemp))
   v4 <- c(v4, rep("",nemp))
   v5 <- c(v5, rep("",nemp))
   v6 <- c(v6, rep("",nemp))
   v7 <- c(v7, rep("",nemp))
   v8 <- c(v8, rep("",nemp))
   v9 <- c(v9, rep("",nemp))
   v10 <- c(v10, rep("",nemp))
 }
  temp1 <- rbind(v1, v2, v3, v4)
  temp2 <- rbind(v5,v6,v7,v8,v9,v10)

  for(j in OPTIONS){
   if (dkeycs[i,j] == "") {
    temp2[c("v5","v6","v7","v8","v9"), j] <- ""
   }
  }
  drei <- rbind(drei, temp1, temp2)
}    
 



#remove dansitmpL, dansitmpM, dansitmpU
# rm(dansitmpL, dansitmpM, dansitmpU)

#remove v1,v2,v3,v4,v5,v6,v7,v8,v9,v10
# rm(v1,v2,v3,v4,v5,v6,v7,v8,v9,v10)


# Output Item Analysis Results
colnames(dita) <- c("Item","N","P","D","Corr","Alpha","Dalpha","Categories",OPTIONS,"NA")

write.table(dita,  itaFileName, row.names=F, sep=",")
write.table(drei,  reiFileName, row.names=F, col.names=F, sep=",")

#remove dita, drei 
# rm(dita, drei)



# Trace lines

pdf(tlgFileName, paper="a4", width=7, height=14, family="Japan1", onefile=T)
  layout(matrix(c(1,2,3,4,5,6), 3,2, byrow=T))
par(mar=c(6,5,2,1))

tlData <- NULL

for(i in itemNames){
 dansi <- dans[,c(idName,i)]
 dsorti <- dsort[,c(idName,"Total","LMU",i)]
 colnames(dsorti) <- c(idName,"Total","LMU","iScore")
 dansi <- merge(dansi, dsorti, by=idName, all=TRUE) 
 dansi <- merge(dansi, dsort[,c(idName,"sortID")], by=idName, all=TRUE)

 # subjects*options matrix per item
 dansitmp <- matrix(c(0), nrow=nSample, ncol=(nOption+1))
 dansitmp <- cbind(dansi, dansitmp)
 colnames(dansitmp) <- c(idName, i, "Total", "LMU", "iScore", "sortID", OPTIONS, "NA")
 for(j in OPTIONS){
  dansitmp[,j] <- ifelse(is.na(dansitmp[,i]), 0,
  ifelse(grepl(j, dansitmp[,i]), 1, 0))
 }
 dansitmp[,"NA"] <- ifelse((rowSums(dansitmp[,OPTIONS])==0), 1, 0)
 dansitmp <- dansitmp[order(dansitmp[,"sortID"]),]

 dansitmp$group <- NGROUP
 if ((NGROUP==3) && (KELLEY==TRUE)) {
  dansitmp$group[1:nSub] <- 1
  dansitmp$group[(nSub+1):(nSample-nSub)] <- 2 
  dansitmp$group[(nSample-nSub+1):nSample] <- 3
 } else {
  nEach <- floor(nSample/NGROUP +0.5)
  for(igp in 1:(NGROUP-1)){
   hb <- (igp-1)*nEach+1
   he <- igp*nEach 
   dansitmp$group[hb:he] <- igp
  }
 }

 ioprop <- matrix(c(0), nrow=(nOption+1), ncol=NGROUP)
 rownames(ioprop) <- c(OPTIONS,"NA")
 colnames (ioprop) <- c(1:NGROUP)

 lwdv <- rep(1, (nOption+1))
 lwdv <- 2*c(as.matrix(dkeys[dkeys$Item==i,OPTIONS]),0) + lwdv
 names(lwdv) <- c(OPTIONS,"NA")

 for(j in 1:NGROUP){
  dansitmpj <- dansitmp[dansitmp$group==j,]
  ioprop[,j] <- colMeans(dansitmpj[,c(OPTIONS,"NA")])
 }

 plot("\n", xlim=c(1, NGROUP), ylim =c(0,1), las=1, axes=F,
                xlab="Group", ylab="Proportion of Being Selected",
                pch="-", type="b", lty=1, lwd=lwdv["NA"], main=i)
 axis(side=1, c(1:NGROUP))
 axis(side=2, seq(0,1, 0.1), las=1)


 for (j in dkeycs[i,]){
  if (j != "") {
   j <- as.character(j)
   par(new=T) 
   plot(c(1:NGROUP), ioprop[j,], xlim=c(1, NGROUP), ylim =c(0,1), las=1, axes=F,
              pch=j, type="b", lty=1, lwd=lwdv[j], xlab="", ylab="", xaxt="n", yaxt="n")
  }
 }

 for(j in OPTIONS){
  if (dkeycs[i,j] == "") {
   ioprop[j,] <- ""
  }
 }

 iopropi <- data.frame(i, rownames(ioprop), ioprop)
 tlData <- rbind(tlData, iopropi)

}

dev.off()


# Output Trace Line Data
colnames(tlData) <- c("Item", "Category", c(1:NGROUP))

write.table(tlData, tldFileName, row.names=F, sep=",")







