Exercise 23. Calculating SMRs/SIRs


library(biostat3) # for Surv and survfit
library(dplyr)    # for data manipulation

(a)

data(melanoma)
scale <- 365.24
mel <- mutate(melanoma,
              ydx=biostat3::year(dx),
              adx=age+0.5, # mid-point approximation
              dead=(status %in% c("Dead: cancer","Dead: other") & surv_mm<110)+0,
              surv_mm=pmin(110,surv_mm),
              astart=adx, 
              astop=adx+surv_mm/12)
mel.split <- survSplit(mel,
                       cut=1:110,
                       event="dead",start="astart", end="astop")
subset(mel.split, id<=2, select=c(id,astart,astop,dead))
##   id astart    astop dead
## 1  1   81.5 82.00000    0
## 2  1   82.0 83.00000    0
## 3  1   83.0 83.70833    1
## 4  2   75.5 76.00000    0
## 5  2   76.0 77.00000    0
## 6  2   77.0 78.00000    0
## 7  2   78.0 79.00000    0
## 8  2   79.0 80.00000    0
## 9  2   80.0 80.12500    1

(b)

mel.split <- mutate(mel.split,
                    ystart=year(dx)+astart-adx,
                    ystop=year(dx)+astop-adx)
mel.split2 <- survSplit(mel.split,
                       cut=1970:2000,event="dead",
                       start="ystart", end="ystop") %>%
    mutate(astart=adx+ystart-ydx,
           astop=adx+ystop-ydx,
           age=floor(astop),
           year=floor(ystop),
           pt = ystop - ystart)
subset(mel.split2, id<=2, select=c(id,ystart,ystop,astart,astop,dead))
##    id   ystart    ystop   astart    astop dead
## 1   1 1981.088 1981.588 81.50000 82.00000    0
## 2   1 1981.588 1982.000 82.00000 82.41239    0
## 3   1 1982.000 1982.588 82.41239 83.00000    0
## 4   1 1982.588 1983.000 83.00000 83.41239    0
## 5   1 1983.000 1983.296 83.41239 83.70833    1
## 6   2 1975.720 1976.000 75.50000 75.77993    0
## 7   2 1976.000 1976.220 75.77993 76.00000    0
## 8   2 1976.220 1977.000 76.00000 76.77993    0
## 9   2 1977.000 1977.220 76.77993 77.00000    0
## 10  2 1977.220 1978.000 77.00000 77.77993    0
## 11  2 1978.000 1978.220 77.77993 78.00000    0
## 12  2 1978.220 1979.000 78.00000 78.77993    0
## 13  2 1979.000 1979.220 78.77993 79.00000    0
## 14  2 1979.220 1980.000 79.00000 79.77993    0
## 15  2 1980.000 1980.220 79.77993 80.00000    0
## 16  2 1980.220 1980.345 80.00000 80.12500    1

(c)

xtabs(pt ~ age+year, data=mel.split2, subset = age>=50 & age<60)
##     year
## age        1975       1976       1977       1978       1979       1980
##   50   0.500000   3.391583  10.455536  11.071077  17.058537  20.845348
##   51   0.500000   4.160055   8.842373  16.240531  17.518148  23.517609
##   52   1.500000   3.910246   6.733632  11.936466  19.925036  25.802427
##   53   1.000000   7.045983  10.874580  10.581481  17.022456  27.066751
##   54   1.875000   4.757639  10.654592  12.756598  14.672380  17.616143
##   55   0.000000   6.127642  10.360640  15.301459  18.499498  20.090183
##   56   0.500000   2.908622  12.657759  14.500370  17.375516  23.279159
##   57   2.000000   1.993100   8.593664  18.546198  18.109540  22.739062
##   58   0.000000   6.516811   7.950142  15.678732  23.196364  21.831385
##   59   0.500000   1.922298   9.598447  11.604698  20.346284  29.369415
##     year
## age        1981       1982       1983       1984       1985       1986
##   50  19.947418  20.862771  28.025079  31.574034  39.306730  45.639370
##   51  19.940423  25.161944  26.373759  35.363076  33.629632  43.994451
##   52  30.111260  25.672106  31.738318  31.025946  35.101157  34.834077
##   53  30.618963  36.036834  30.358490  37.623444  30.902165  40.248563
##   54  30.583931  35.842542  40.954934  35.896032  40.450631  35.519599
##   55  20.650312  38.193110  41.300624  47.759943  42.162532  46.188182
##   56  25.041439  26.071966  44.267322  47.063908  50.410552  44.187306
##   57  27.832891  30.374676  31.886504  47.675871  51.853699  53.748047
##   58  26.717647  27.308094  34.913199  39.439350  51.287824  55.960610
##   59  25.724642  34.894129  33.081695  38.381156  42.671431  52.687215
##     year
## age        1987       1988       1989       1990       1991       1992
##   50  39.791963  46.949700  51.070958  56.247467  59.958151  52.591200
##   51  49.729817  48.248302  58.699211  59.021698  55.214074  62.615121
##   52  46.453013  49.612314  49.796390  56.512435  64.619880  52.176555
##   53  40.123330  47.163199  53.276001  53.391870  64.324522  67.734526
##   54  46.522725  45.090018  56.756169  62.248097  59.957211  65.653803
##   55  40.018545  49.124174  56.936768  61.472297  64.446638  64.973647
##   56  56.168528  45.649819  55.914476  59.685011  63.949344  70.918268
##   57  48.856318  66.145598  57.070419  60.333666  60.419144  67.747134
##   58  50.455851  44.663491  73.223433  56.813880  61.988473  59.681050
##   59  61.395069  56.948189  58.177313  75.040959  58.712207  68.097155
##     year
## age        1993       1994       1995
##   50  53.407705  71.462226 113.789677
##   51  51.655884  52.860238 111.303294
##   52  59.741722  59.259911  86.191874
##   53  54.199138  60.466533  74.454304
##   54  70.343149  58.783608  89.101591
##   55  67.846818  68.582854  82.324700
##   56  75.597908  72.752998  92.335172
##   57  71.863797  75.714672 103.586560
##   58  74.026654  77.016797 117.916607
##   59  67.515638  73.857108 119.728147
xtabs(dead ~ age+year, data=mel.split2, subset = age>=50 & age<60)
##     year
## age  1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988
##   50    0    1    0    0    2    3    1    3    0    0    2    1    4    1
##   51    0    1    1    1    1    3    0    1    1    2    2    0    3    2
##   52    0    1    1    1    0    1    2    0    2    2    0    1    1    1
##   53    0    0    3    0    1    4    2    2    1    2    2    1    5    0
##   54    1    0    2    0    0    0    1    2    2    4    1    1    2    1
##   55    0    0    0    3    4    1    0    3    2    1    2    3    4    0
##   56    0    1    0    1    2    2    3    1    2    3    2    3    2    0
##   57    0    0    2    2    1    1    5    2    2    1    3    2    3    5
##   58    0    0    2    2    0    1    2    3    2    3    5    2    1    2
##   59    0    0    2    0    4    1    1    3    3    2    2    1    5    4
##     year
## age  1989 1990 1991 1992 1993 1994 1995
##   50    2    3    3    2    2    4    5
##   51    2    3    3    2    1    0    2
##   52    0    1    3    1    0    2    7
##   53    0    0    3    3    2    2    1
##   54    2    3    3    3    1    1    3
##   55    3    3    6    1    3    1    2
##   56    4    4    1    1    4    3    2
##   57    0    3    5    2    4    2    1
##   58    4    1    4    2    5    3    0
##   59    3    0    0    1    3    4    3

(d)

mel.split2 <- mutate(mel.split2,
                     age10=cut(age,seq(0,110,by=10),right=FALSE),
                     year10=cut(year,seq(1970,2000,by=5),right=FALSE))
head(survRate(Surv(pt,dead)~sex+age10+year10, data=mel.split2))
##                                                sex   age10      year10
## sex=Male, age10=[0,10)   , year10=[1980,1985) Male  [0,10) [1980,1985)
## sex=Male, age10=[0,10)   , year10=[1985,1990) Male  [0,10) [1985,1990)
## sex=Male, age10=[0,10)   , year10=[1990,1995) Male  [0,10) [1990,1995)
## sex=Male, age10=[10,20)  , year10=[1975,1980) Male [10,20) [1975,1980)
## sex=Male, age10=[10,20)  , year10=[1980,1985) Male [10,20) [1980,1985)
## sex=Male, age10=[10,20)  , year10=[1985,1990) Male [10,20) [1985,1990)
##                                                    tstop event       rate
## sex=Male, age10=[0,10)   , year10=[1980,1985) 10.1303800     0 0.00000000
## sex=Male, age10=[0,10)   , year10=[1985,1990)  9.3849113     1 0.10655402
## sex=Male, age10=[0,10)   , year10=[1990,1995)  0.6666667     0 0.00000000
## sex=Male, age10=[10,20)  , year10=[1975,1980)  3.9120624     1 0.25561965
## sex=Male, age10=[10,20)  , year10=[1980,1985) 13.8037774     1 0.07244394
## sex=Male, age10=[10,20)  , year10=[1985,1990) 22.8439017     0 0.00000000
##                                                     lower     upper
## sex=Male, age10=[0,10)   , year10=[1980,1985) 0.000000000 0.3641403
## sex=Male, age10=[0,10)   , year10=[1985,1990) 0.002697714 0.5936810
## sex=Male, age10=[0,10)   , year10=[1990,1995) 0.000000000 5.5333192
## sex=Male, age10=[10,20)  , year10=[1975,1980) 0.006471729 1.4242215
## sex=Male, age10=[10,20)  , year10=[1980,1985) 0.001834122 0.4036318
## sex=Male, age10=[10,20)  , year10=[1985,1990) 0.000000000 0.1614820

(e)

pt <- mutate(mel.split2,sex=unclass(sex)) %>%
    group_by(sex, age, year) %>%
    summarise(pt=sum(pt))
expected <- inner_join(popmort, pt) %>%
    mutate(pt=ifelse(is.na(pt),0,pt)) %>%
    group_by(sex,year) %>%
    summarise(E=sum(rate*pt)) %>% ungroup
## Joining, by = c("sex", "age", "year")
## Warning: Column `sex` has different attributes on LHS and RHS of join
observed <- mutate(mel.split2, sex=as.numeric(unclass(sex))) %>%
    group_by(sex, year) %>%
    summarise(O=sum(dead)) %>% ungroup
joint <- inner_join(observed,expected) %>%
    mutate(SMR = O/E)
## Joining, by = c("sex", "year")

(f)

## overall SMRs
by(joint, joint$sex, function(data) poisson.test(sum(data$O), sum(data$E)))
## joint$sex: 1
## 
##  Exact Poisson test
## 
## data:  sum(data$O) time base: sum(data$E)
## number of events = 1461, time base = 554.57, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
##  2.501094 2.773102
## sample estimates:
## event rate 
##   2.634465 
## 
## -------------------------------------------------------- 
## joint$sex: 2
## 
##  Exact Poisson test
## 
## data:  sum(data$O) time base: sum(data$E)
## number of events = 1259, time base = 527.39, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
##  2.257152 2.522810
## sample estimates:
## event rate 
##   2.387211
## utility function to draw a confidence interval
polygon.ci <- function(time, interval, col="lightgrey") 
    polygon(c(time,rev(time)), c(interval[,1],rev(interval[,2])), col=col, border=col)

## modelling by calendar period
summary(fit <- glm(O ~ sex*ns(year,df=3)+offset(log(E)), data=joint, family=poisson))
## 
## Call:
## glm(formula = O ~ sex * ns(year, df = 3) + offset(log(E)), family = poisson, 
##     data = joint)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7132  -0.6997  -0.1196   0.6188   2.2989  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             1.7205     0.3366   5.111 3.21e-07 ***
## sex                     0.2409     0.2251   1.070   0.2845    
## ns(year, df = 3)1      -0.6225     0.2646  -2.353   0.0186 *  
## ns(year, df = 3)2      -1.1852     0.7613  -1.557   0.1195    
## ns(year, df = 3)3      -0.7224     0.1831  -3.944 8.01e-05 ***
## sex:ns(year, df = 3)1  -0.1950     0.1730  -1.127   0.2596    
## sex:ns(year, df = 3)2  -0.6906     0.5072  -1.361   0.1734    
## sex:ns(year, df = 3)3  -0.1486     0.1206  -1.232   0.2180    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 295.251  on 41  degrees of freedom
## Residual deviance:  39.335  on 34  degrees of freedom
## AIC: 301.19
## 
## Number of Fisher Scoring iterations: 4
##
pred <- predict(fit,type="response",newdata=mutate(joint,E=1),se.fit=TRUE)
full <- cbind(mutate(joint,fit=pred$fit), confint.predictnl(pred))
ci.cols <- c("lightgrey", "grey")
matplot(full$year, full[,c("2.5 %", "97.5 %")], type="n", ylab="SMR", xlab="Calendar year")
for (i in 1:2) {
    with(subset(full, sex==i), {
        polygon.ci(year, cbind(`2.5 %`, `97.5 %`), col=ci.cols[i])
    })
}
for (i in 1:2) {
    with(subset(full, sex==i), {
        lines(year,fit,col=i)
    })
}
legend("topright", legend=levels(mel.split2$sex), lty=1, col=1:2, bty="n")