Wait list from scratch • simK

library(simK)

Simulating a kidney transplant waiting list is easy to do, with functions from {simK}.

Candidates

The minimum information needed for kidney transplant candidates encompasses: HLA typing, ABO blood group, cPRA, age, time on dialysis, and clinical urgency.

We can start by defining our wait list size (n) and patients’ HLA according to its origin:

candidates <- hla_sample(n = 100, replace = TRUE, origin = 'API')
#> New names:
#> • `api` -> `api...4`
#> • `afa` -> `afa...5`
#> • `cau` -> `cau...6`
#> • `his` -> `his...7`
#> • `api` -> `api...11`
#> • `afa` -> `afa...12`
#> • `cau` -> `cau...13`
#> • `his` -> `his...14`

# lets create also an unique identifier for each patient
candidates$ID <- paste0('R', rownames(candidates))

candidates
#> # A tibble: 100 × 7
#>    A1    A2    B1    B2    DR1   DR2   ID   
#>    <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 1     33    61    44    7     7     R1   
#>  2 24    2     35    13    12    3     R2   
#>  3 2     24    60    62    15    4     R3   
#>  4 29    29    7     7     3     10    R4   
#>  5 11    11    54    52    4     15    R5   
#>  6 24    24    38    7     15    1     R6   
#>  7 2     11    62    60    15    15    R7   
#>  8 33    33    44    58    13    13    R8   
#>  9 30    24    13    75    7     12    R9   
#> 10 2     3     51    44    16    13    R10  
#> # ℹ 90 more rows

Patients’ blood group can be added with abo() and their ages with function ages():

# we just have to define sample size and probabilities for A, AB, B and O groups (in this order)
candidates$bg <- abo(n = nrow(candidates), probs = c(0.4, 0.04, 0.06, 0.5))

# for patients' ages we define a trimmed normal distributed value with mean and sd
candidates$age <- ages(n = nrow(candidates), lower = 18, upper = 80, mean = 65, sd = 30)

# let's take a look at our transplant candidates
candidates
#> # A tibble: 100 × 9
#>    A1    A2    B1    B2    DR1   DR2   ID    bg      age
#>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
#>  1 1     33    61    44    7     7     R1    O        48
#>  2 24    2     35    13    12    3     R2    A        58
#>  3 2     24    60    62    15    4     R3    O        67
#>  4 29    29    7     7     3     10    R4    A        69
#>  5 11    11    54    52    4     15    R5    B        79
#>  6 24    24    38    7     15    1     R6    O        27
#>  7 2     11    62    60    15    15    R7    A        44
#>  8 33    33    44    58    13    13    R8    A        52
#>  9 30    24    13    75    7     12    R9    A        76
#> 10 2     3     51    44    16    13    R10   O        77
#> # ℹ 90 more rows

Now we must define groups of sensitization to transplant candidates according to a 4 elements vector of probabilities corresponding to cPRA = 0%; 1% < cPRA < 50%; 50% < cPRA < 85%; and cPRA > 85%, respectively.

candidates$cPRA <- cpra(n = nrow(candidates), probs = c(0.7, 0.1, 0.1, 0.1))

# and we can also classify those hipersensitised patients
candidates$hiper <- candidates$cPRA > 85

Time on dialysis (in months) is computed according to blood group and hipersensitation status:

candidates <- candidates |> 
  dplyr::rowwise() |>
  dplyr::mutate(dialysis = dial(hiper = hiper, bg = bg, seed.number = NA)) |>
  dplyr::ungroup()

candidates
#> # A tibble: 100 × 12
#>    A1    A2    B1    B2    DR1   DR2   ID    bg      age  cPRA hiper dialysis
#>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <lgl>    <dbl>
#>  1 1     33    61    44    7     7     R1    O        48     0 FALSE       66
#>  2 24    2     35    13    12    3     R2    A        58    73 FALSE       57
#>  3 2     24    60    62    15    4     R3    O        67     0 FALSE       90
#>  4 29    29    7     7     3     10    R4    A        69    37 FALSE       40
#>  5 11    11    54    52    4     15    R5    B        79    82 FALSE       23
#>  6 24    24    38    7     15    1     R6    O        27     0 FALSE       49
#>  7 2     11    62    60    15    15    R7    A        44    21 FALSE       40
#>  8 33    33    44    58    13    13    R8    A        52     0 FALSE       16
#>  9 30    24    13    75    7     12    R9    A        76     0 FALSE        2
#> 10 2     3     51    44    16    13    R10   O        77     0 FALSE       82
#> # ℹ 90 more rows

And we can finish it defining patients clinical urgent (if any):

# let´s asume we have 5% of candidates that are clinical urgent for transplantation
candidates$urgent <- sample(c(0,1), size = 100, replace = TRUE, prob = c(0.95,0.05))

# reording the columns
candidates |>
  dplyr::select(ID, bg, A1, A2, B1, B2, DR1, DR2, age, cPRA, hiper, dialysis, urgent)
#> # A tibble: 100 × 13
#>    ID    bg    A1    A2    B1    B2    DR1   DR2     age  cPRA hiper dialysis
#>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <lgl>    <dbl>
#>  1 R1    O     1     33    61    44    7     7        48     0 FALSE       66
#>  2 R2    A     24    2     35    13    12    3        58    73 FALSE       57
#>  3 R3    O     2     24    60    62    15    4        67     0 FALSE       90
#>  4 R4    A     29    29    7     7     3     10       69    37 FALSE       40
#>  5 R5    B     11    11    54    52    4     15       79    82 FALSE       23
#>  6 R6    O     24    24    38    7     15    1        27     0 FALSE       49
#>  7 R7    A     2     11    62    60    15    15       44    21 FALSE       40
#>  8 R8    A     33    33    44    58    13    13       52     0 FALSE       16
#>  9 R9    A     30    24    13    75    7     12       76     0 FALSE        2
#> 10 R10   O     2     3     51    44    16    13       77     0 FALSE       82
#> # ℹ 90 more rows
#> # ℹ 1 more variable: urgent <dbl>