重复测量方差分析

sunqi

2020/7/26

概述

双因素的重复测量资料方差分析

代码

数据获得

library(tidyverse)
library(ggpubr)
library(rstatix)
rm(list=ls())
set.seed(123)
data("selfesteem2", package = "datarium")
# 抽样
selfesteem2 %>% sample_n_by(treatment, size = 1)

## # A tibble: 2 x 5
##   id    treatment    t1    t2    t3
##   <fct> <fct>     <dbl> <dbl> <dbl>
## 1 3     ctr          93    92    89
## 2 3     Diet         91    91    92

# 数据含有5个变量，其中三个时间点，一个为治疗方式，一个为id
# 个案为12个，每个人进行3次测量，2种治疗

# 对数据进行长转宽
#将id和时间转换为因子
selfesteem2 <- selfesteem2 %>%
  gather(key = "time", value = "score", t1, t2, t3) %>%
  convert_as_factor(id, time)
# 检查数据
set.seed(123)
selfesteem2 %>% sample_n_by(treatment, time, size = 1)

## # A tibble: 6 x 4
##   id    treatment time  score
##   <fct> <fct>     <fct> <dbl>
## 1 3     ctr       t1       93
## 2 3     ctr       t2       92
## 3 10    ctr       t3       81
## 4 2     Diet      t1      100
## 5 6     Diet      t2       75
## 6 11    Diet      t3       91

# 描述数据
selfesteem2 %>%
  group_by(treatment, time) %>%
  get_summary_stats(score, type = "mean_sd")

## # A tibble: 6 x 6
##   treatment time  variable     n  mean    sd
##   <fct>     <fct> <chr>    <dbl> <dbl> <dbl>
## 1 ctr       t1    score       12  88    8.08
## 2 ctr       t2    score       12  83.8 10.2
## 3 ctr       t3    score       12  78.7 10.5
## 4 Diet      t1    score       12  87.6  7.62
## 5 Diet      t2    score       12  87.8  7.42
## 6 Diet      t3    score       12  87.7  8.14

# 可视化

bxp <- ggboxplot(
  selfesteem2, x = "time", y = "score",
  color = "treatment", palette = "jco"
  )
bxp

# 检测假设
## 异常值
selfesteem2 %>%
  group_by(treatment, time) %>%
  identify_outliers(score)

## [1] treatment  time       id         score      is.outlier is.extreme
## <0 rows> (or 0-length row.names)

## 正态假设

selfesteem2 %>%
  group_by(treatment, time) %>%
  shapiro_test(score)

## # A tibble: 6 x 5
##   treatment time  variable statistic      p
##   <fct>     <fct> <chr>        <dbl>  <dbl>
## 1 ctr       t1    score        0.828 0.0200
## 2 ctr       t2    score        0.868 0.0618
## 3 ctr       t3    score        0.887 0.107
## 4 Diet      t1    score        0.919 0.279
## 5 Diet      t2    score        0.923 0.316
## 6 Diet      t3    score        0.886 0.104

# qqplot
ggqqplot(selfesteem2, "score", ggtheme = theme_bw()) +
  facet_grid(time ~ treatment, labeller = "label_both")

# 计算
## dv 需要计算的值
## wid id
# within 分组因素
res.aov <- anova_test(
  data = selfesteem2, dv = score, wid = id,
  within = c(treatment, time)
  )
get_anova_table(res.aov)

## ANOVA Table (type III tests)
##
##           Effect  DFn   DFd      F        p p<.05   ges
## 1      treatment 1.00 11.00 15.541 2.00e-03     * 0.059
## 2           time 1.31 14.37 27.369 5.03e-05     * 0.049
## 3 treatment:time 2.00 22.00 30.424 4.63e-07     * 0.050

## 结果包含单因素和交互作用

## 事后检验
# 因为存在交互作用，所以分别进行事后
# Simple main effect：控制一个因素不同水平，对另一个因素检验
# Simple pairwise comparisons：如果上个因素有效则进行
## 对于无交互作用，需要观察方差分析的主效应


# 在不同时间点上治疗的主效应
one.way <- selfesteem2 %>%
  group_by(time) %>%
  anova_test(dv = score, wid = id, within = treatment) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
one.way

## # A tibble: 3 x 9
##   time  Effect      DFn   DFd      F       p `p<.05`      ges   p.adj
##   <fct> <chr>     <dbl> <dbl>  <dbl>   <dbl> <chr>      <dbl>   <dbl>
## 1 t1    treatment     1    11  0.376 0.552   ""      0.000767 1
## 2 t2    treatment     1    11  9.03  0.012   "*"     0.052    0.036
## 3 t3    treatment     1    11 30.9   0.00017 "*"     0.199    0.00051

# 在t1上无差异

# 使用匹配的t检验查看具体组的差异
pwc <- selfesteem2 %>%
  group_by(time) %>%
  pairwise_t_test(
    score ~ treatment, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc

## # A tibble: 3 x 11
##   time  .y.   group1 group2    n1    n2 statistic    df       p   p.adj
## * <fct> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 t1    score ctr    Diet      12    12     0.613    11 0.552   0.552
## 2 t2    score ctr    Diet      12    12    -3.00     11 0.012   0.012
## 3 t3    score ctr    Diet      12    12    -5.56     11 0.00017 0.00017
## # ... with 1 more variable: p.adj.signif <chr>

# 两两比较发现，ctr组和Diet组在t2 (p = 0.12)和t3
# (p = 0.00017)时差异有统计学意义
# ，而在t1时差异无统计学意义(p = 0.55)。


# 时间效应的分析
# 治疗的不同水平
one.way2 <- selfesteem2 %>%
  group_by(treatment) %>%
  anova_test(dv = score, wid = id, within = time) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
# 事后检验
pwc2 <- selfesteem2 %>%
  group_by(treatment) %>%
  pairwise_t_test(
    score ~ time, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc2

## # A tibble: 6 x 11
##   treatment .y.   group1 group2    n1    n2 statistic    df       p   p.adj
## * <fct>     <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 ctr       score t1     t2        12    12     4.53     11 8.58e-4 3.00e-3
## 2 ctr       score t1     t3        12    12     6.91     11 2.55e-5 7.65e-5
## 3 ctr       score t2     t3        12    12     6.49     11 4.49e-5 1.35e-4
## 4 Diet      score t1     t2        12    12    -0.522    11 6.12e-1 1.00e+0
## 5 Diet      score t1     t3        12    12    -0.102    11 9.21e-1 1.00e+0
## 6 Diet      score t2     t3        12    12     0.283    11 7.82e-1 1.00e+0
## # ... with 1 more variable: p.adj.signif <chr>

# 对于交互作用不显著的分析
# 直接比较治疗和时间的事后检验
selfesteem2 %>%
  pairwise_t_test(
    score ~ treatment, paired = TRUE,
    p.adjust.method = "bonferroni"
    )

## # A tibble: 1 x 10
##   .y.   group1 group2    n1    n2 statistic    df        p    p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>    <dbl>    <dbl> <chr>
## 1 score ctr    Diet      36    36     -4.35    35 0.000113 0.000113 ***

selfesteem2 %>%
  pairwise_t_test(
    score ~ time, paired = TRUE,
    p.adjust.method = "bonferroni"
    )

## # A tibble: 3 x 10
##   .y.   group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>
## 1 score t1     t2        24    24      2.86    23 0.009 0.027 *
## 2 score t1     t3        24    24      3.70    23 0.001 0.004 **
## 3 score t2     t3        24    24      3.75    23 0.001 0.003 **

# 所有的检测都是有意义的



# 可视化
pwc <- pwc %>% add_xy_position(x = "time")
bxp +
  stat_pvalue_manual(pwc, tip.length = 0, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.aov, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )

## 报告结果

治疗与时间对结果得分的交互作用有统计学意义，F(2,22) = 30.4, p < 0.0001。
因此，我们在每个时间点分析处理变量的影响。p值采用Bonferroni多重测试校正方法进行调整。治疗效果在t2 (p = 0.036)和t3 (p = 0.00051)时显著，而在t1时不显著(p = 1)。
两两比较，配对t检验显示，在t2 (p = 0.012)和t3 (p = 0.00017)时间点，ctr和diet试验的平均自尊得分有显著差异，而在t1 (p = 0.55)时间点则无显著差异。

结束语

love&peace

R海拾遗-双因素重复测量方差分析

重复测量方差分析

sunqi

2020/7/26

概述

代码

数据获得

结束语