We extracted data from NHANES-III, estimating HOMA2-IR and HOMA2-B using fasting glucose concentrations and C-peptide with the HOMA2-IR calculator from Oxford. We evaluated cases with diabetes defined as previous diagnosis of diabetes HbA1c >6.5% or fasting glucose >126mg/dL, as recommended by ADA.
library(SAScii); library(tidyverse); library(survival); library(ggpubr)
#### Download NHANES-III ####
download.file("https://wwwn.cdc.gov/nchs/data/nhanes3/24a/ssgad.xpt", tf0 <- tempfile(), mode="wb")
GAD65 <- foreign::read.xport(tf0)
#### Household data ###
download.file("https://wwwn.cdc.gov/nchs/data/nhanes3/1a/adult.dat", tf <- tempfile(), mode="wb")
nhanes3<-read.SAScii(fn = tf,sas_ri = "https://wwwn.cdc.gov/nchs/data/nhanes3/1a/adult.sas")
write.csv(nhanes3, "nhanes3.csv")
#### Laboratory data ###
download.file("https://wwwn.cdc.gov/nchs/data/nhanes3/1a/lab.dat", tf1 <- tempfile(), mode="wb")
nhanes3_lab<-read.SAScii(fn = tf1,sas_ri = "https://wwwn.cdc.gov/nchs/data/nhanes3/1a/lab.sas")
write.csv(nhanes3_lab, "nhanes3_lab.csv")
#### Examination data ###
download.file("https://wwwn.cdc.gov/nchs/data/nhanes3/1a/exam.dat", tf1 <- tempfile(), mode="wb")
nhanes3_exam<-read.SAScii(fn = tf1,sas_ri = "https://wwwn.cdc.gov/nchs/data/nhanes3/1a/exam.sas")
#### Merge datasets ###
library(tidyverse)
nhanes3_data<- nhanes3_lab %>% left_join(GAD65, by="SEQN")
nhanes3_data<-nhanes3_data %>% left_join(nhanes3, by="SEQN")
nhanes3_data<-nhanes3 %>% left_join(nhanes3_exam, by="SEQN")
write.csv(nhanes3_data, "nhanes3.csv")
#### Dataset clusters ####
nhanes3<-read.csv("nhanes3.csv")
nhanes3<- nhanes3%>% filter(!is.na(GAPGAD65) & !is.na(C1PSI) & HSAGEIR.x>=20)
nhanes3$diabetes<-as.numeric((nhanes3$GHP>=6.5 |nhanes3$G1P>=126 | nhanes3$HAD1==1))
nhanes3fin<- nhanes3 %>% filter(diabetes==1 & (C1PSI!=88888 & G1PSI!=888888 & GHP!=8888 & BMPBMI!=8888))
clusters<-nhanes3fin %>% dplyr::select(SEQN,GHP, HAD5R, G1PSI, C1PSI,BMPBMI, HSAGEIR.x,GAPGAD65)
names(clusters)<-c("SEQN", "HbA1c", "ADD", "GLU", "CPEP", "BMI", "AGE", "GAD65")
clusters$ADD[is.na(clusters$ADD) | clusters$ADD==999 | clusters$ADD==888]<-clusters$AGE
cox.imp<-function(x){
var<-summary(pool(x))[1]
HR<-exp(summary(pool(x))[2])
lwr<-exp(summary(pool(x))[2]-1.96*summary(pool(x))[3])
upr<-exp(summary(pool(x))[2]+1.96*summary(pool(x))[3])
pval<-summary(pool(x))[6]
print(cbind(var,HR, lwr, upr, pval))
}
We used HbA1c, body-mass index (BMI), age at diabetes diagnosis, HOMA2-IR and HOMA2-B to clasify diabetes clusters using the SNNN algorithm previously developed by our team (https://uiem.shinyapps.io/diabetes_clusters_app/). Once we classified these subgroups, we classified severe autoimmune diabetes (SAID) using anti-GAD65 antibodies with values >0.069 indicating positivity.
clust<-read.csv("clusters.csv")
clust0<-clust%>%select(SEQN, class, HOMA2_B, HOMA2_IR)
nhanes<-nhanes3fin %>% left_join(clust0, by="SEQN")
nhanes$Cluster<-as.character(nhanes$class)
nhanes$Cluster[nhanes$GAPGAD65>=0.069]<-"said"
table(nhanes$Cluster, nhanes$class)
##
## mard mord siid sird
## mard 235 0 0 0
## mord 0 208 0 0
## said 14 5 18 2
## siid 0 0 293 0
## sird 0 0 0 161
nhanes$HAD5R[is.na(nhanes$HAD5R) | nhanes$HAD5R==999 | nhanes$HAD5R==888]<-nhanes$HSAGEIR.x
## Warning in nhanes$HAD5R[is.na(nhanes$HAD5R) | nhanes$HAD5R == 999 | nhanes$HAD5R
## == : número de items para para sustituir no es un múltiplo de la longitud del
## reemplazo
nhanes$years<-nhanes$HSAGEIR.x-nhanes$HAD5R
nhanes$recent<-ifelse(nhanes$years<5,1,0)
nhanes$recent<-factor(nhanes$recent, labels = c("<5 years since diagnosis", ">=5 years since diagnosis"))
mort<-read.csv("nhanes3_mort.csv")
mort$SEQN<-mort$seqn
nhanes0<-nhanes %>% left_join(mort, by="SEQN")
nhanes0$cluster2<-factor(nhanes0$Cluster, labels = c("MARD", "MOD", "SAID", "SIID", "SIRD"))
nhanes0$cluster2<-relevel(nhanes0$cluster2, ref="MOD")
## Evaluating diabetes frequencies based on years since diabetes diagnosis
t1<-table(nhanes0$recent, nhanes0$cluster2)
knitr::kable(t1, align = "lccrr")
MOD | MARD | SAID | SIID | SIRD | |
---|---|---|---|---|---|
<5 years since diagnosis | 142 | 133 | 24 | 192 | 68 |
>=5 years since diagnosis | 66 | 102 | 18 | 101 | 93 |
### Plotting cluster frequencies
df <- nhanes0 %>%
group_by(cluster2, recent) %>%
summarise(Counts = n()) %>%
mutate(freq = Counts / sum(Counts)) %>% drop_na()
## `summarise()` regrouping output by 'cluster2' (override with `.groups` argument)
df<-as.data.frame(df)
df$Counts<-as.numeric(df$Counts);df$freq<-as.numeric(df$freq)
ggplot(data=df, aes(y=freq, x=cluster2,fill=cluster2)) +
geom_bar(stat="identity", position='dodge')+scale_fill_nejm()+
scale_y_continuous(labels = scales::percent_format())+labs(fill="Cluster")+
theme_pubr()+ylab("Frequency (%)")+xlab("")+facet_wrap(~recent)+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())+
geom_text(aes(label=round(freq*100,1)), vjust=1.6, color="white", size=3.5)
We identified that the proportion of diabetes subgroups was modified by defining recent vs. non-recent diabetes diagnosis. The profiles of each biomarker are consistent with what is expected for each cluster.
g1<-ggplot(nhanes0, aes(x=cluster2, y=HOMA2_IR, fill=cluster2))+geom_boxplot()+scale_y_log10()+
ylab("HOMA2-IR")+xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
g2<-ggplot(nhanes0, aes(x=cluster2, y=HOMA2_B, fill=cluster2))+geom_boxplot()+scale_y_log10()+
ylab("HOMA2-B")+xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
g3<-ggplot(nhanes0, aes(x=cluster2, y=GHP, fill=cluster2))+geom_boxplot()+ylab("HbA1c (%)")+
xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
g4<-ggplot(nhanes0, aes(x=cluster2, y=G1P, fill=cluster2))+geom_boxplot()+scale_y_log10()+
ylab("Glucose (mg/dL)")+xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
g5<-ggplot(nhanes0, aes(x=cluster2, y=BMPBMI, fill=cluster2))+geom_boxplot()+scale_y_log10()+
ylab("BMI (Kg/m2)")+xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
g6<-ggplot(nhanes0, aes(x=cluster2, y=HAD5R, fill=cluster2))+geom_boxplot()+ylab("Age at diagnosis (years)")+
xlab("Cluster")+theme_classic()+labs(fill="Cluster")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
ggarrange(g1, g2, g3, g4, g5, g6, labels=LETTERS[1:6])
Next, we fitted a Cox proportional risk regression model to predict all-cause mortality. Time of follow-up was estimated from examination until last record. We performed multiple imputation by chained equations using the mice
R packag with 5 iterations and five multiply imputed datasets.
nhanes0$mort_cvd_rec<-NULL;
nhanes0$mort_cvd_rec[nhanes0$ucod_leading==1]<-1;
nhanes0$mort_cvd_rec[nhanes0$ucod_leading>1]<-2;
nhanes0$mort_cvd_rec[nhanes0$ucod_leading==0]<-0
nhanes0$mort_cvd_rec<-na.tools::na.replace(nhanes0$mort_cvd_rec,0)
nhanes0$mort_db_rec<-NULL;
nhanes0$mort_db_rec[nhanes0$ucod_leading==2]<-1;
nhanes0$mort_db_rec[nhanes0$ucod_leading!=2]<-2;
nhanes0$mort_db_rec[nhanes0$ucod_leading==0]<-0
nhanes0$mort_db_rec<-na.tools::na.replace(nhanes0$mort_db_rec,0)
nhanes2<-nhanes0 %>% select(cluster2, mortstat, permth_exm, HSSEX.x,DMARACER.x,
HAR3,HAD6,HAD10,HAE5A,HSAGEIR.x,HAE8D,mort_cvd_rec,mort_db_rec, years)
nhanes2<-nhanes2[!is.na(nhanes2$mortstat) & !is.na(nhanes2$cluster2),]
nhanes3<-nhanes2 %>%filter(years<=5)
set.seed(123);imp<-mice::mice(nhanes2, maxit=5, m=5)
set.seed(123);imp2<-mice::mice(nhanes3, maxit=5, m=5)
To account for multiple imputation, we used R pooling function to obtain estimates of all-cause mortality for each cluster, using mild-obesity related diabetes (MOD) as the reference category.
##
## Attaching package: 'mice'
## The following objects are masked _by_ '.GlobalEnv':
##
## nhanes, nhanes2
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate std.error statistic df p.value
## 1 cluster2MARD 0.6606550 0.1104646 5.980694 997403.7 2.222638e-09
## 2 cluster2SAID 0.4742804 0.1871304 2.534491 997403.7 1.126123e-02
## 3 cluster2SIID 0.4736432 0.1064895 4.447791 997403.7 8.676704e-06
## 4 cluster2SIRD 0.6867577 0.1204675 5.700772 997403.7 1.192995e-08
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate estimate estimate p.value
## 1 cluster2MARD 1.936060 1.559156 2.404076 2.222638e-09
## 2 cluster2SAID 1.606858 1.113496 2.318815 1.126123e-02
## 3 cluster2SIID 1.605834 1.303332 1.978547 8.676704e-06
## 4 cluster2SIRD 1.987262 1.569318 2.516512 1.192995e-08
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate std.error statistic df p.value
## 1 cluster2MARD 0.67810930 0.11491767 5.9008273 1602.76170 4.404544e-09
## 2 cluster2SAID 0.32886438 0.19144023 1.7178436 12904.37637 8.584913e-02
## 3 cluster2SIID 0.44273521 0.10879162 4.0695708 34167.94737 4.720563e-05
## 4 cluster2SIRD 0.72445532 0.12401376 5.8417332 14984.03752 5.273178e-09
## 5 HSSEX.x -0.08034425 0.07557929 -1.0630459 17382.20308 2.877759e-01
## 6 DMARACER.x -0.11570909 0.07873727 -1.4695594 5084.94197 1.417430e-01
## 7 HAR3 0.04102256 0.09286604 0.4417391 107.63473 6.595643e-01
## 8 HAD6 -0.37676363 0.11107810 -3.3918804 26.13394 2.219442e-03
## 9 HAD10 0.08431858 0.07097304 1.1880368 148.06415 2.367211e-01
## 10 HAE5A -0.05255745 0.06855499 -0.7666465 29.70661 4.493416e-01
## 11 HAE8D -0.10529243 0.09643160 -1.0918872 11.46606 2.972945e-01
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate estimate estimate p.value
## 1 cluster2MARD 1.9701492 1.5728208 2.4678515 4.404544e-09
## 2 cluster2SAID 1.3893894 0.9546997 2.0220002 8.584913e-02
## 3 cluster2SIID 1.5569600 1.2579754 1.9270046 4.720563e-05
## 4 cluster2SIRD 2.0636068 1.6183196 2.6314165 5.273178e-09
## 5 HSSEX.x 0.9227986 0.7957425 1.0701418 2.877759e-01
## 6 DMARACER.x 0.8907343 0.7633534 1.0393712 1.417430e-01
## 7 HAR3 1.0418756 0.8684938 1.2498706 6.595643e-01
## 8 HAD6 0.6860782 0.5518513 0.8529532 2.219442e-03
## 9 HAD10 1.0879754 0.9466853 1.2503528 2.367211e-01
## 10 HAE5A 0.9487998 0.8295058 1.0852500 4.493416e-01
## 11 HAE8D 0.9000613 0.7450542 1.0873173 2.972945e-01
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate std.error statistic df p.value
## 1 cluster2MARD -0.10409422 0.131822418 -0.7896549 141.941730 4.310465e-01
## 2 cluster2SAID 0.14378697 0.196794085 0.7306468 1008.879609 4.651646e-01
## 3 cluster2SIID 0.31581996 0.110445623 2.8595064 6175.424470 4.257232e-03
## 4 cluster2SIRD 0.22413706 0.135380173 1.6556121 227.422601 9.917958e-02
## 5 HSSEX.x -0.21873142 0.075874369 -2.8828104 4418.383641 3.960470e-03
## 6 DMARACER.x -0.01914291 0.080836408 -0.2368105 608.878800 8.128835e-01
## 7 HAR3 -0.33746987 0.094791330 -3.5601344 107.959207 5.525989e-04
## 8 HAD6 -0.28393250 0.095824220 -2.9630557 103.211179 3.781755e-03
## 9 HAD10 0.09688734 0.060727055 1.5954559 2478.138935 1.107378e-01
## 10 HAE5A -0.04076450 0.085269990 -0.4780639 9.851429 6.430358e-01
## 11 HAE8D 0.09259158 0.157094840 0.5893993 5.454091 5.791666e-01
## 12 HSAGEIR.x 0.07616702 0.005507496 13.8297003 27.443917 6.861178e-14
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## Warning in pool.fitlist(getfit(object), dfcom = dfcom): Large sample assumed.
## term estimate estimate estimate p.value
## 1 cluster2MARD 0.9011404 0.6959579 1.1668149 4.310465e-01
## 2 cluster2SAID 1.1546381 0.7851116 1.6980888 4.651646e-01
## 3 cluster2SIID 1.3713833 1.1044490 1.7028331 4.257232e-03
## 4 cluster2SIRD 1.2512425 0.9596294 1.6314712 9.917958e-02
## 5 HSSEX.x 0.8035375 0.6925013 0.9323774 3.960470e-03
## 6 DMARACER.x 0.9810392 0.8372921 1.1494648 8.128835e-01
## 7 HAR3 0.7135735 0.5925850 0.8592642 5.525989e-04
## 8 HAD6 0.7528175 0.6239107 0.9083577 3.781755e-03
## 9 HAD10 1.1017362 0.9781055 1.2409936 1.107378e-01
## 10 HAE5A 0.9600552 0.8122934 1.1346959 6.430358e-01
## 11 HAE8D 1.0970136 0.8062880 1.4925670 5.791666e-01
## 12 HSAGEIR.x 1.0791428 1.0675564 1.0908549 6.861178e-14