TCGAbiolinks获取癌症临床信息

时间:2022-07-22
本文章向大家介绍TCGAbiolinks获取癌症临床信息,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。

前面我们简单的介绍了一下肿瘤的TNM分期系统。今天我们来用R获取感兴 趣的癌症的临床信息,其中就可以找到我们上次讲到的TNM分期信息。

#加载TCGAbiolinks包
library(TCGAbiolinks)
#下载TCGA-CHOL这个项目相关的临床信息,这个项目是胆管癌
clinical <- GDCquery_clinic(project = "TCGA-CHOL", type = "clinical")
#将下载到的临床信息写入到clinical.csv文件中
write.csv(file="clinical.csv",clinical)
> dim(clinical)
[1]  51 158

一共得到51个样本的临床信息,一共有158条临床信息,他们分别是

> names(clinical)
  [1] "submitter_id"                                        
  [2] "year_of_diagnosis"                                   
  [3] "classification_of_tumor"                             
  [4] "last_known_disease_status"                           
  [5] "updated_datetime"                                    
  [6] "primary_diagnosis"                                   
  [7] "tumor_stage"                                         
  [8] "age_at_diagnosis"                                    
  [9] "morphology"                                          
 [10] "days_to_last_known_disease_status"                   
 [11] "created_datetime"                                    
 [12] "prior_treatment"                                     
 [13] "ajcc_pathologic_n"                                   
 [14] "ajcc_pathologic_m"                                   
 [15] "state"                                               
 [16] "days_to_last_follow_up"                              
 [17] "days_to_recurrence"                                  
 [18] "diagnosis_id"                                        
 [19] "tumor_grade"                                         
 [20] "icd_10_code"                                         
 [21] "days_to_diagnosis"                                   
 [22] "tissue_or_organ_of_origin"                           
 [23] "progression_or_recurrence"                           
 [24] "prior_malignancy"                                    
 [25] "ajcc_staging_system_edition"                         
 [26] "ajcc_pathologic_stage"                               
 [27] "synchronous_malignancy"                              
 [28] "site_of_resection_or_biopsy"                         
 [29] "ajcc_pathologic_t"                                   
 [30] "vascular_invasion_type"                              
 [31] "method_of_diagnosis"                                 
 [32] "laterality"                                          
 [33] "ann_arbor_pathologic_stage"                          
 [34] "inrg_stage"                                          
 [35] "metastasis_at_diagnosis"                             
 [36] "cog_rhabdomyosarcoma_risk_group"                     
 [37] "irs_group"                                           
 [38] "weiss_assessment_score"                              
 [39] "enneking_msts_metastasis"                            
 [40] "esophageal_columnar_dysplasia_degree"                
 [41] "lymph_nodes_tested"                                  
 [42] "ishak_fibrosis_score"                                
 [43] "lymphatic_invasion_present"                          
 [44] "supratentorial_localization"                         
 [45] "igcccg_stage"                                        
 [46] "gastric_esophageal_junction_involvement"             
 [47] "goblet_cells_columnar_mucosa_present"                
 [48] "iss_stage"                                           
 [49] "enneking_msts_tumor_site"                            
 [50] "tumor_regression_grade"                              
 [51] "gross_tumor_weight"                                  
 [52] "ajcc_clinical_stage"                                 
 [53] "ann_arbor_b_symptoms"                                
 [54] "cog_neuroblastoma_risk_group"                        
 [55] "days_to_best_overall_response"                       
 [56] "enneking_msts_grade"                                 
 [57] "tumor_largest_dimension_diameter"                    
 [58] "figo_stage"                                          
 [59] "child_pugh_classification"                           
 [60] "peripancreatic_lymph_nodes_positive"                 
 [61] "circumferential_resection_margin"                    
 [62] "lymph_nodes_positive"                                
 [63] "inss_stage"                                          
 [64] "inpc_histologic_group"                               
 [65] "secondary_gleason_grade"                             
 [66] "residual_disease"                                    
 [67] "anaplasia_present"                                   
 [68] "ann_arbor_clinical_stage"                            
 [69] "tumor_confined_to_organ_of_origin"                   
 [70] "perineural_invasion_present"                         
 [71] "tumor_focality"                                      
 [72] "wilms_tumor_histologic_subtype"                      
 [73] "ajcc_clinical_t"                                     
 [74] "ajcc_clinical_n"                                     
 [75] "ajcc_clinical_m"                                     
 [76] "irs_stage"                                           
 [77] "first_symptom_prior_to_diagnosis"                    
 [78] "cog_liver_stage"                                     
 [79] "enneking_msts_stage"                                 
 [80] "inpc_grade"                                          
 [81] "burkitt_lymphoma_clinical_variant"                   
 [82] "ann_arbor_extranodal_involvement"                    
 [83] "peripancreatic_lymph_nodes_tested"                   
 [84] "masaoka_stage"                                       
 [85] "mitosis_karyorrhexis_index"                          
 [86] "esophageal_columnar_metaplasia_present"              
 [87] "best_overall_response"                               
 [88] "vascular_invasion_present"                           
 [89] "micropapillary_features"                             
 [90] "primary_gleason_grade"                               
 [91] "anaplasia_present_type"                              
 [92] "medulloblastoma_molecular_classification"            
 [93] "cog_renal_stage"                                     
 [94] "metastasis_at_diagnosis_site"                        
 [95] "gleason_grade_group"                                 
 [96] "NA."                                                 
 [97] "cigarettes_per_day"                                  
 [98] "weight"                                              
 [99] "alcohol_history"                                     
[100] "alcohol_intensity"                                   
[101] "bmi"                                                 
[102] "years_smoked"                                        
[103] "exposure_id"                                         
[104] "height"                                              
[105] "environmental_tobacco_smoke_exposure"                
[106] "tobacco_smoking_status"                              
[107] "pack_years_smoked"                                   
[108] "respirable_crystalline_silica_exposure"              
[109] "coal_dust_exposure"                                  
[110] "asbestos_exposure"                                   
[111] "type_of_tobacco_used"                                
[112] "tobacco_smoking_onset_year"                          
[113] "smoking_frequency"                                   
[114] "tobacco_smoking_quit_year"                           
[115] "type_of_smoke_exposure"                              
[116] "alcohol_days_per_week"                               
[117] "time_between_waking_and_first_smoke"                 
[118] "radon_exposure"                                      
[119] "alcohol_drinks_per_day"                              
[120] "gender"                                              
[121] "year_of_birth"                                       
[122] "race"                                                
[123] "days_to_birth"                                       
[124] "ethnicity"                                           
[125] "vital_status"                                        
[126] "demographic_id"                                      
[127] "age_at_index"                                        
[128] "year_of_death"                                       
[129] "days_to_death"                                       
[130] "cause_of_death"                                      
[131] "weeks_gestation_at_birth"                            
[132] "premature_at_birth"                                  
[133] "treatments_pharmaceutical_days_to_treatment_start"   
[134] "treatments_pharmaceutical_treatment_effect"          
[135] "treatments_pharmaceutical_initial_disease_status"    
[136] "treatments_pharmaceutical_treatment_type"            
[137] "treatments_pharmaceutical_treatment_id"              
[138] "treatments_pharmaceutical_therapeutic_agents"        
[139] "treatments_pharmaceutical_regimen_or_line_of_therapy"
[140] "treatments_pharmaceutical_treatment_intent_type"     
[141] "treatments_pharmaceutical_treatment_anatomic_site"   
[142] "treatments_pharmaceutical_treatment_outcome"         
[143] "treatments_pharmaceutical_days_to_treatment_end"     
[144] "treatments_pharmaceutical_treatment_or_therapy"      
[145] "treatments_radiation_days_to_treatment_start"        
[146] "treatments_radiation_treatment_effect"               
[147] "treatments_radiation_initial_disease_status"         
[148] "treatments_radiation_treatment_type"                 
[149] "treatments_radiation_treatment_id"                   
[150] "treatments_radiation_therapeutic_agents"             
[151] "treatments_radiation_regimen_or_line_of_therapy"     
[152] "treatments_radiation_treatment_intent_type"          
[153] "treatments_radiation_treatment_anatomic_site"        
[154] "treatments_radiation_treatment_outcome"              
[155] "treatments_radiation_days_to_treatment_end"          
[156] "treatments_radiation_treatment_or_therapy"           
[157] "bcr_patient_barcode"                                 
[158] "disease"

我们上次提到的TNM分期信息在这里的名字分别叫"ajcc_pathologic_t","ajcc_pathologic_n"和"ajcc_pathologic_m" 。

这个文件也保存到本地了,我们可以直接用Excel打开查看,并找到这三列。