rfor-loopcohen-kappakappa

Loop each item in a for loop with itself in R


I have a data frame with ratings made by 4 different reviewers; each row is a reviewer pair rating an image.


    df <- data.frame(Reviewer1 = c("Name1", "Name2", "Name3", "Name4", "Name2", "Name3", "Name1", "Name3", "Name1", "Name4", "Name1", "Name1", "Name1", "Name2", "Name3", "Name4", "Name2", "Name3", "Name1", "Name2", "Name1", "Name4", "Name1", "Name1", "Name3", "Name2", "Name4", "Name3", "Name1", "Name2", "Name1", "Name3", "Name4", "Name3", "Name2", "Name2", "Name2", "Name3", "Name1", "Name3", "Name3", "Name1", "Name4", "Name2", "Name3", "Name4", "Name4", "Name3", "Name4"),
    Rating1 = c("Worst", "Worst", "Best", "Bad", "Good", "Worst", "Best", "Worst", "Best", "Bad", "Worst", "Worst", "Worst", "Good", "Best", "Bad", "Good", "Worst", "Best", "Worst", "Best", "Bad", "Worst", "Worst", "Best", "Worst", "Worst", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Good", "Bad", "Good", "Good", "Bad", "Worst", "Good", "Worst", "Worst", "Worst", "Worst", "Good", "Good"),
    Reviewer2 = c("Name3", "Name1", "Name1", "Name1", "Name4", "Name4", "Name2", "Name4", "Name2", "Name2", "Name2", "Name2", "Name3", "Name1", "Name1", "Name1", "Name4", "Name4", "Name2", "Name3", "Name3", "Name2", "Name2", "Name2", "Name1", "Name4", "Name3", "Name1", "Name2", "Name3", "Name3", "Name1", "Name2", "Name4", "Name4", "Name1", "Name4", "Name2", "Name3", "Name4", "Name1", "Name3", "Name2", "Name3", "Name1", "Name2", "Name3", "Name2", "Name3"),
    Rating2 = c("Best", "Good", "Worst", "Good", "Best", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Bad", "Worst", "Best", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Good", "Bad", "Good", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Worst"))

My end goal is to create contingency tables for Cohen's Kappa analyses of each reviewer pair. For that, I need counts of reviewer pair ratings with the following rules:

Because a given rater can function as Reviewer1 as well as Reviewer2 in a given pair, I also need to sum the data where they functioned as both, e.g., sum the YY count for Name2 as Reviewer1 and Name3 as Reviewer2 with Name3 as Reviewer1 and Name2 as Reviewer2. How to do that?

Thank you so much for your help in advance!

EDIT: I've made some changes to the code below that enable adding the reviewer pairs to the output (last point) and remove the reviewer pairings with themselves (second point) though still ideally they wouldn't be in the first loop output at all.

# Prep for the first loop
    plist <- unique(df$Reviewer1) # Get count of Names
    pseq <- seq(1, length(plist), by = 1) # Create sequence to use numbers for the loop instead of the reviewer names
    pmap <- data.frame(pseq, plist) # Map numbers to names

# Initialize empty lists
    NN <- c()
    YY <- c()
    YN <- c()
    NY <- c()
    ind <- vector()
    pairs <- data.frame()

# Loop over pairs
    for(i in pseq) {
      for(j in pseq) {
        if (i!=j)
          ind <- c(i,j)
          pairs <- rbind(pairs, ind)
          NN[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1=='Worst' & 
                                                              df$Reviewer2==pmap[j,2] & df$Rating2=='Worst'), ])
          YY[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1!='Worst' & 
                                                              df$Reviewer2==pmap[j,2] & df$Rating2!='Worst'), ])
          YN[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1!='Worst' & 
                                                              df$Reviewer2==pmap[j,2] & df$Rating2=='Worst'), ])
          NY[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1=='Worst' & 
                                                              df$Reviewer2==pmap[j,2] & df$Rating2!='Worst'), ])
      }
    }

    # Remove the first row as that's Reviewer1 with themselves
    NN <- NN[-(1)]
    YY <- YY[-(1)]
    YN <- YN[-(1)]
    NY <- NY[-(1)]

# Put rating lists into one list and convert that to a data frame
    resps <- c('YY', 'YN', 'NY', 'NN')
    resplist = list(YY, YN, NY, NN)
    respdf <- as.data.frame(do.call(cbind, resplist))
    colnames(respdf) <- c(resps)
    respdf <- cbind(pairs, respdf)
    respdf <- respdf[!duplicated(respdf[c('X1', 'X2')]), ] # Remove duplicate rows based on duplicates from the pairs (the duplicates represent 2&2 and 3&3)

# Cohen's Kappa Analyses
# Put data into individual matrices (i.e., contingency tables) and do Kappa analyses, saving the results
    kseq <- nrow(respdf)
    pabakest = data.frame()
    pabakLCI = data.frame()
    pabakUCI = data.frame()
    kappaest = data.frame()
    kappaLCI = data.frame()
    kappaUCI = data.frame()
    z  = data.frame()
    p  = data.frame()

# Y=all but "Worst" rating and N="Worst" rating

    for(i in 1:kseq) {
      temp <- as.matrix(respdf[i, 2:5])
      tempvec <- unlist(temp)
      kappadata <- matrix(tempvec, nrow = 2, byrow = TRUE)
      kappa <- epi.kappa(kappadata, method = "cohen", alternative = "greater", conf.level = 0.95)
      t1 <- round(kappa[[2]][1],2)
      pabakest <- rbind(pabakest,t1)
      t2 <- round(kappa[[2]][2],2)
      pabakLCI <- rbind(pabakLCI,t2)
      t3 <- round(kappa[[2]][3],2)
      pabakUCI <- rbind(pabakUCI,t3)
      t4 <- round(kappa[[3]][1],2)
      kappaest <- rbind(kappaest,t4)
      t5 <- round(kappa[[3]][3],2)
      kappaLCI <- rbind(kappaLCI,t5)
      t6 <- round(kappa[[3]][4],2)
      kappaUCI <- rbind(kappaUCI,t6)
      t7 <- round(kappa[[4]][1],2)
      z <- rbind(z,t7)  
      t8 <- round(kappa[[4]][2],2)
      p <- rbind(p,t8)
    }

# Add reviewer pair rows/cols once I have them
    kappaoutput <- cbind(pabakest, pabakLCI, pabakUCI, kappaest, kappaLCI, kappaUCI, z, p)


Solution

  • Here is an tidyverse option. I divided the code bit by bit but you can condense further if needed.

    First step is to transform all the ratings that are not "Worst" to the same value here "Not_Worst".

    library(tidyverse)
        
    df1 = df %>% mutate(across(starts_with('Rating'), ~ case_when(str_detect(., "Worst", negate = TRUE) ~ "Not_Worst", TRUE ~ .)))
    
    > df1
       Reviewer1   Rating1 Reviewer2   Rating2
    1      Name1     Worst     Name3 Not_Worst
    2      Name2     Worst     Name1 Not_Worst
    3      Name3 Not_Worst     Name1     Worst
    4      Name4 Not_Worst     Name1 Not_Worst
    5      Name2 Not_Worst     Name4 Not_Worst
    6      Name3     Worst     Name4     Worst
    7      Name1 Not_Worst     Name2 Not_Worst
    8      Name3     Worst     Name4     Worst
    9      Name1 Not_Worst     Name2     Worst
    10     Name4 Not_Worst     Name2 Not_Worst
    

    Note that with this option you just need to change "Worst" to something else (eg. "Good") to generate the stat based on the new value. You can even wrap all the code in a function to just change this parameter if you wish.

    Then, we group_by reviewers and rating to count each pair (r1) :

    df2 = df1 %>% group_by(Reviewer1, Reviewer2, Rating1, Rating2) %>% summarise(r1 = n()) %>% ungroup()
    
    > df2
    # A tibble: 26 x 5
       Reviewer1 Reviewer2 Rating1   Rating2      r1
       <chr>     <chr>     <chr>     <chr>     <int>
     1 Name1     Name2     Not_Worst Not_Worst     3
     2 Name1     Name2     Not_Worst Worst         1
     3 Name1     Name2     Worst     Worst         4
     4 Name1     Name3     Not_Worst Not_Worst     2
     5 Name1     Name3     Not_Worst Worst         1
     6 Name1     Name3     Worst     Not_Worst     2
     7 Name1     Name3     Worst     Worst         1
     8 Name2     Name1     Not_Worst Not_Worst     1
     9 Name2     Name1     Not_Worst Worst         1
    10 Name2     Name1     Worst     Not_Worst     1
    # ... with 16 more rows
    

    The next step use several dplyr functions to combine the rating as a single column (YY, YN, NY, NN in your code) here called Worst_Worst Worst_Not_Worst,... Then the pivot_wider will moved these new values to columns and fill with the count previously calculated (r1), or 0 if absent. Finally the Worst_Worst column (aka NN) is moved at the end.

    df3 = df2 %>% unite(combo_names ,starts_with('Rating'),sep = "_", remove = TRUE) %>%
      pivot_wider(names_from = combo_names, values_from = r1, values_fill = list(r1 = 0)) %>%
      select(-Worst_Worst, Worst_Worst)
    
    > df3
    # A tibble: 11 x 6
       Reviewer1 Reviewer2 Not_Worst_Not_Worst Not_Worst_Worst Worst_Not_Worst Worst_Worst
       <chr>     <chr>                   <int>           <int>           <int>       <int>
     1 Name1     Name2                       3               1               0           4
     2 Name1     Name3                       2               1               2           1
     3 Name2     Name1                       1               1               1           0
     4 Name2     Name3                       0               0               1           2
     5 Name2     Name4                       3               0               0           2
    

    The last change is just to have the data frame ready for the loop by removing the reviewers columns !starts_with('Review')

    df4 = df3 %>% select(!starts_with('Review'))
    > df4
    # A tibble: 11 x 4
       Not_Worst_Not_Worst Not_Worst_Worst Worst_Not_Worst Worst_Worst
                     <int>           <int>           <int>       <int>
     1                   3               1               0           4
     2                   2               1               2           1
     3                   1               1               1           0
     4                   0               0               1           2
     5                   3               0               0           2
     6                   5               1               0           1
     7                   2               0               0           0
     8                   1               0               0           4
     9                   1               1               0           0
    10                   3               0               0           2
    11                   0               1               0           2
    

    Now the data are ready for the loop to calculate the kappa stats. You can streamline the loop by creating a empty list() beforehand then each iteration will be added to the list:

    kappa_list = list()
    for(i in 1:nrow(df4)) {
      temp <- matrix(unlist(df4[i, ]), nrow = 2, byrow = TRUE)
      kappa_list[[i]] <- epi.kappa(temp, method = "cohen", alternative = "greater", conf.level = 0.95)
    }
    

    Now that all the resuts are nested into a list, the last step is to unlist all these and save it as a dataframe, adding the correct column names, then merging to the pair of reviewers:

    results = as.data.frame(matrix(unlist(kappa_list), ncol = 11, byrow = TRUE))
    
    names(results) = names(unlist(kappa_list[[1]]))
    
    results = cbind(select(df3, starts_with('Review')), round(results, 2))
    
    > results
       Reviewer1 Reviewer2 prop.agree.obs prop.agree.exp pabak.est pabak.lower pabak.upper kappa.est kappa.se kappa.lower kappa.upper z.test.statistic z.p.value
    1      Name1     Name2           0.88           0.50      0.75       -0.05        0.99      0.75     0.23        0.29        1.21             3.21      0.00
    2      Name1     Name3           0.50           0.50      0.00       -0.76        0.76      0.00     0.41       -0.80        0.80             0.00      0.50
    3      Name2     Name1           0.33           0.56     -0.33       -0.98        0.81     -0.50     0.61       -1.70        0.70            -0.82      0.79
    4      Name2     Name3           0.67           0.67      0.33       -0.81        0.98      0.00     0.82       -1.60        1.60             0.00      0.50
    5      Name2     Name4           1.00           0.52      1.00       -0.04        1.00      1.00     0.00        1.00        1.00              Inf      0.00
    6      Name3     Name1           0.86           0.65      0.71       -0.16        0.99      0.59     0.38       -0.16        1.34             1.54      0.06
    7      Name3     Name2           1.00           1.00      1.00       -0.68        1.00       NaN      NaN         NaN         NaN              NaN       NaN
    8      Name3     Name4           1.00           0.68      1.00       -0.04        1.00      1.00     0.00        1.00        1.00              Inf      0.00
    9      Name4     Name1           0.50           0.50      0.00       -0.97        0.97      0.00     0.71       -1.39        1.39             0.00      0.50
    10     Name4     Name2           1.00           0.52      1.00       -0.04        1.00      1.00     0.00        1.00        1.00              Inf      0.00
    11     Name4     Name3           0.67           0.67      0.33       -0.81        0.98      0.00     0.82       -1.60        1.60             0.00      0.50