rmigrationmarkov-chains

How to create a matrix of data migration?


Suppose we start with this dataframe, and R-code that generates it immediately below:

> data
   ID Period Values Flags
1   1      1      5    X0
2   1      2     10    X1
3   1      3     15    X2
4   1      4     20    X3
5   2      1      0    X0
6   2      2      2    X2
7   2      3      4    XO
8   2      4      6    X1
9   3      1      3    XO
10  3      2      6    XO
11  3      3      9    X2
12  3      4     12    XO

data <- 
  data.frame(
    ID = c(1,1,1,1,2,2,2,2,3,3,3,3),
    Period = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
    Values = c(5, 10, 15, 20, 0, 2, 4, 6, 3, 6, 9, 12),
    Flags = c("X0","X1","X2","X3","X0","X2","XO", "X1", "XO","XO","X2","XO")
  )

I am trying to generate code that shows the migration of the number of ID's (and Values by number of ID's) from one "Flag" category to the next, based on the 2 periods input by the user. So for example, if the user inputs period 1 as the "from" period and period 4 as the "to" period, we'd get the migration tables as shown in the image at the bottom. I also include 2/3 from/to on the right side of the image, for sake of illustration.

I've typically done this sort of analysis in Excel, a cumbersome multi-step process, and am now trying it out in R.

Any suggestions for coding this? enter image description here


Solution

  • Here are functions to create the two required tables.

    Note that you had a problem with X0 and XO in your mock data set as r2evans already suggested (I've transformed everything to X0).

    data <- 
      data.frame(
        ID = c(1,1,1,1,2,2,2,2,3,3,3,3),
        Period = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
        Values = c(5, 10, 15, 20, 0, 2, 4, 6, 3, 6, 9, 12),
        Flags = c("X0","X1","X2","X3","X0","X2","X0", "X1", "X0","X0","X2","X0")
      )
    
    generateTable <- function(data){
      df <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
      row.names(df) <- unique(data$Flags)
      names(df) <- unique(data$Flags)
      return(df)
    }
    
    numbers2migrate <- function(data, from=1, to=4){
      df <- generateTable(data)
      for (i in unique(data$ID)){
        id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
        id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
        column <- which(names(df) == id_from)
        row <- which(row.names(df) == id_to)
        df[row, column] <- ifelse(is.na(df[row, column]), 1, df[row, column] + 1)
      }
      return(df)
    }
    
    values2migrate <- function(data, from=1, to=4){
      df <- generateTable(data)
      for (i in unique(data$ID)){
        id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
        id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
        column <- which(names(df) == id_from)
        row <- which(row.names(df) == id_to)
        
        val <- (data$Values[(data$ID == i & data$Period == from)])
        df[row, column] <- val
      }
      return(df)
    }
    
    
    > numbers2migrate(data, from=1, to=4)
       X0 X1 X2 X3
    X0  1 NA NA NA
    X1  1 NA NA NA
    X2 NA NA NA NA
    X3  1 NA NA NA
    > values2migrate(data,1,4)
       X0 X1 X2 X3
    X0  3 NA NA NA
    X1  0 NA NA NA
    X2 NA NA NA NA
    X3  5 NA NA NA