How to print all columns and prevent an insertion of one when using lapply()?
I have a file with 28 columns and numerous rows >10000. I am splitting this file by the second column called gene_id, so that there are numerous outputs each file with a distinct gene_id.
variant_id gene_id tss_distance ma_samples ma_count maf pval_nominal slope slope_se hg38_chr hg38_pos ref_allele alt_allele hg19_chr hg19_pos ID new_MAF CHROM POS REF ALT A1 OBS_CT BETA SE P SD Variance
chr1_17726150_G_A_b38 ENSG00000272426.1 821374 68 78 0.0644628 0.764314 -0.0320846 0.106958 chr1 17726150 G A chr1 18052645 rs260514:18052645:G:A 0.058155 1 18052645 G A G 1597 0.0147047 0.0656528 0.822804 2.62364886486368 6.88353336610048
chr1_17729225_G_A_b38 ENSG00000117118.9 675055 205 226 0.186777 0.770706 0.00898192 0.0308023 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890444032956592 0.792890575828
chr1_17729225_G_A_b38 ENSG00000117122.13 748390 205 226 0.186777 0.0373499 0.0553745 0.0265315 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890444032956592 0.792890575828
chr1_177298830_G_A_b38 ENSG00000117122.13 7483450 245 246 0.106777 0.0377699 0.009745 0.0265315 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890449757 0.79289055858
output 1
chr1_17726150_G_A_b38 ENSG00000272426.1 821374 68 78 0.0644628 0.764314 -0.0320846 0.106958 chr1 17726150 G A chr1 18052645 rs260514:18052645:G:A 0.058155 1 18052645 G A G 1597 0.0147047 0.0656528 0.822804 2.62364886486368 6.88353336610048
output 2
chr1_17729225_G_A_b38 ENSG00000117118.9 675055 205 226 0.186777 0.770706 0.00898192 0.0308023 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890444032956592 0.792890575828
output 3
chr1_17729225_G_A_b38 ENSG00000117122.13 748390 205 226 0.186777 0.0373499 0.0553745 0.0265315 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890444032956592 0.792890575828
chr1_17729883_G_A_b38 ENSG00000117122.13 7483450 245 246 0.106777 0.0377699 0.009745 0.0265315 chr1 17729225 G A chr1 18055720 rs11580304:18055720:G:A 0.194694 1 18055720 G A A 1597 0.00515331 0.022282 0.817129 0.890449757 0.79289055858
I am using the r script below:
df <- read.table("/data/coloc_eQTL/combined_GWAS_skin_eQTL_AL.txt", header = TRUE)
mylist <- split(df , f = df$gene_id)
lapply(names(mylist), function(x) write.table(mylist[[x]], file=paste(x,".txt"), sep="\t", row.names=FALSE, quote=FALSE))
However, I notice in the output file there is a column inserted at the start with numbers even though I have stated rownames = FALSE. Also, the output does not show the P column. Therefore, everything is misaligned. How can I ensure all columns are retained and an additional column is not added at the start?
number variant_id gene_id tss_distance ma_samples ma_count maf pval_nominal slope slope_se hg38_chr hg38_pos ref_allele alt_allele hg19_chr hg19_pos ID new_MAF CHROM POS REF ALT A1 OBS_CT BETA SE SD Variance
6253451 chr1_17726150_G_A_b38 ENSG00000074964.16 186315 68 78 0.0644628 0.966721 0.00151619 0.0363244 chr1 17726150 G A chr1 18052645 rs260514:18052645:G:A 0.058155 1 18052645 G A G 1597 0.0147047 0.0656528 0.822804 2.62364886486368 6.88353336610048
dput the original file (10 rows).
structure(list(variant_id = c("chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38",
"chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38",
"chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38",
"chr1_17726150_G_A_b38", "chr1_17726150_G_A_b38"), gene_id = c("ENSG00000272426.1",
"ENSG00000117118.9", "ENSG00000142623.9", "ENSG00000142619.4",
"ENSG00000179023.8", "ENSG00000228549.3", "ENSG00000058453.16",
"ENSG00000159339.13", "ENSG00000074964.16", "ENSG00000117122.13"
), tss_distance = c(821374L, 671980L, 521024L, 477052L, -754832L,
855205L, 804200L, 417955L, 186315L, 745315L), ma_samples = c(68L,
68L, 68L, 68L, 68L, 68L, 68L, 68L, 68L, 68L), ma_count = c(78L,
78L, 78L, 78L, 78L, 78L, 78L, 78L, 78L, 78L), maf = c(0.0644628,
0.0644628, 0.0644628, 0.0644628, 0.0644628, 0.0644628, 0.0644628,
0.0644628, 0.0644628, 0.0644628), pval_nominal = c(0.764314,
0.955989, 0.352575, 0.00666648, 0.667965, 0.0943182, 0.489115,
0.796736, 0.966721, 0.326205), slope = c(-0.0320846, -0.00275742,
-0.0687903, -0.202377, 0.0460589, -0.180725, -0.0449686, 0.0258654,
0.00151619, -0.0424019), slope_se = c(0.106958, 0.0499406, 0.0739349,
0.0743021, 0.107318, 0.10783, 0.0649652, 0.10037, 0.0363244,
0.0431489), hg38_chr = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1", "chr1", "chr1", "chr1", "chr1"), hg38_pos = c(17726150L,
17726150L, 17726150L, 17726150L, 17726150L, 17726150L, 17726150L,
17726150L, 17726150L, 17726150L), ref_allele = c("G", "G", "G",
"G", "G", "G", "G", "G", "G", "G"), alt_allele = c("A", "A",
"A", "A", "A", "A", "A", "A", "A", "A"), hg19_chr = c("chr1",
"chr1", "chr1", "chr1", "chr1", "chr1", "chr1", "chr1", "chr1",
"chr1"), hg19_pos = c(18052645L, 18052645L, 18052645L, 18052645L,
18052645L, 18052645L, 18052645L, 18052645L, 18052645L, 18052645L
), ID = c("rs260514:18052645:G:A", "rs260514:18052645:G:A", "rs260514:18052645:G:A",
"rs260514:18052645:G:A", "rs260514:18052645:G:A", "rs260514:18052645:G:A",
"rs260514:18052645:G:A", "rs260514:18052645:G:A", "rs260514:18052645:G:A",
"rs260514:18052645:G:A"), new_MAF = c(0.058155, 0.058155, 0.058155,
0.058155, 0.058155, 0.058155, 0.058155, 0.058155, 0.058155, 0.058155
), CHROM = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), POS = c(18052645L,
18052645L, 18052645L, 18052645L, 18052645L, 18052645L, 18052645L,
18052645L, 18052645L, 18052645L), REF = c("G", "G", "G", "G",
"G", "G", "G", "G", "G", "G"), ALT = c("A", "A", "A", "A", "A",
"A", "A", "A", "A", "A"), A1 = c("G", "G", "G", "G", "G", "G",
"G", "G", "G", "G"), OBS_CT = c(1597L, 1597L, 1597L, 1597L, 1597L,
1597L, 1597L, 1597L, 1597L, 1597L), BETA = c(0.0147047, 0.0147047,
0.0147047, 0.0147047, 0.0147047, 0.0147047, 0.0147047, 0.0147047,
0.0147047, 0.0147047), SE = c(0.0656528, 0.0656528, 0.0656528,
0.0656528, 0.0656528, 0.0656528, 0.0656528, 0.0656528, 0.0656528,
0.0656528), P = c(0.822804, 0.822804, 0.822804, 0.822804, 0.822804,
0.822804, 0.822804, 0.822804, 0.822804, 0.822804), SD = c(2.62364886486368,
2.62364886486368, 2.62364886486368, 2.62364886486368, 2.62364886486368,
2.62364886486368, 2.62364886486368, 2.62364886486368, 2.62364886486368,
2.62364886486368), Variance = c(6.88353336610048, 6.88353336610048,
6.88353336610048, 6.88353336610048, 6.88353336610048, 6.88353336610048,
6.88353336610048, 6.88353336610048, 6.88353336610048, 6.88353336610048
)), row.names = c(NA, 10L), class = "data.frame")
dput the results of mylist (the first 2 rows).
list(ENSG00000058453.16 = structure(list(variant_id = c("chr1_17726150_G_A_b38",
"chr1_17728143_GC_G_b38", "chr1_17728290_G_A_b38", "chr1_17729225_G_A_b38",
"chr1_17729967_C_T_b38", "chr1_17731217_C_T_b38"), gene_id = c("ENSG00000058453.16",
"ENSG00000058453.16", "ENSG00000058453.16", "ENSG00000058453.16",
"ENSG00000058453.16", "ENSG00000058453.16"), tss_distance = c(804200L,
806193L, 806340L, 807275L, 808017L, 809267L), ma_samples = c(68L,
395L, 167L, 205L, 233L, 233L), ma_count = c(78L, 486L, 183L,
226L, 262L, 263L), maf = c(0.0644628, 0.401653, 0.15124, 0.186777,
0.216529, 0.217355), pval_nominal = c(0.489115, 0.210837, 0.820243,
0.301818, 0.137132, 0.128855), slope = c(-0.0449686, 0.0404518,
0.0097847, 0.0413934, 0.0574705, 0.0585334), slope_se = c(0.0649652,
0.0322899, 0.0430392, 0.0400502, 0.0386021, 0.038484), hg38_chr = c("chr1",
"chr1", "chr1", "chr1", "chr1", "chr1"), hg38_pos = c(17726150L,
17728143L, 17728290L, 17729225L, 17729967L, 17731217L), ref_allele = c("G",
"GC", "G", "G", "C", "C"), alt_allele = c("A", "G", "A", "A",
"T", "T"), hg19_chr = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1"), hg19_pos = c(18052645L, 18054638L, 18054785L, 18055720L,
18056462L, 18057712L), ID = c("rs260514:18052645:G:A", "rs35592535:18054638:GC:G",
"rs1572792:18054785:G:A", "rs11580304:18055720:G:A", "rs1890743:18056462:C:T",
"rs7546135:18057712:C:T"), new_MAF = c(0.058155, 0.371673, 0.17466,
0.194694, 0.197464, 0.198691), CHROM = c(1L, 1L, 1L, 1L, 1L,
1L), POS = c(18052645L, 18054638L, 18054785L, 18055720L, 18056462L,
18057712L), REF = c("G", "GC", "G", "G", "C", "C"), ALT = c("A",
"G", "A", "A", "T", "T"), A1 = c("G", "G", "A", "A", "T", "T"
), OBS_CT = c(1597L, 1597L, 1597L, 1597L, 1597L, 1597L), BETA = c(0.0147047,
0.0138673, -0.0126002, 0.00515331, 0.00415908, 0.00597402), SE = c(0.0656528,
0.0269643, 0.0256229, 0.022282, 0.0220529, 0.0217018), P = c(0.822804,
0.607124, 0.622959, 0.817129, 0.850434, 0.783139), SD = c(2.62364886486368,
1.07756036432328, 1.02395469042471, 0.890444032956592, 0.88128862823752,
0.867257800664993), Variance = c(6.88353336610048, 1.16113633876053,
1.04848320804277, 0.792890575828, 0.77666964626077, 0.75213609281428
)), row.names = c(7L, 25L, 45L, 56L, 77L, 94L), class = "data.frame"),
ENSG00000074964.16 = structure(list(variant_id = c("chr1_17726150_G_A_b38",
"chr1_17728143_GC_G_b38", "chr1_17728290_G_A_b38", "chr1_17729225_G_A_b38",
"chr1_17729967_C_T_b38", "chr1_17731217_C_T_b38"), gene_id = c("ENSG00000074964.16",
"ENSG00000074964.16", "ENSG00000074964.16", "ENSG00000074964.16",
"ENSG00000074964.16", "ENSG00000074964.16"), tss_distance = c(186315L,
188308L, 188455L, 189390L, 190132L, 191382L), ma_samples = c(68L,
395L, 167L, 205L, 233L, 233L), ma_count = c(78L, 486L, 183L,
226L, 262L, 263L), maf = c(0.0644628, 0.401653, 0.15124,
0.186777, 0.216529, 0.217355), pval_nominal = c(0.966721,
0.954589, 0.17366, 0.865996, 0.547435, 0.565949), slope = c(0.00151619,
0.00102964, -0.0327149, -0.00378263, -0.01301, -0.0123769
), slope_se = c(0.0363244, 0.0180728, 0.0240136, 0.0224053,
0.0216116, 0.021548), hg38_chr = c("chr1", "chr1", "chr1",
"chr1", "chr1", "chr1"), hg38_pos = c(17726150L, 17728143L,
17728290L, 17729225L, 17729967L, 17731217L), ref_allele = c("G",
"GC", "G", "G", "C", "C"), alt_allele = c("A", "G", "A",
"A", "T", "T"), hg19_chr = c("chr1", "chr1", "chr1", "chr1",
"chr1", "chr1"), hg19_pos = c(18052645L, 18054638L, 18054785L,
18055720L, 18056462L, 18057712L), ID = c("rs260514:18052645:G:A",
"rs35592535:18054638:GC:G", "rs1572792:18054785:G:A", "rs11580304:18055720:G:A",
"rs1890743:18056462:C:T", "rs7546135:18057712:C:T"), new_MAF = c(0.058155,
0.371673, 0.17466, 0.194694, 0.197464, 0.198691), CHROM = c(1L,
1L, 1L, 1L, 1L, 1L), POS = c(18052645L, 18054638L, 18054785L,
18055720L, 18056462L, 18057712L), REF = c("G", "GC", "G",
"G", "C", "C"), ALT = c("A", "G", "A", "A", "T", "T"), A1 = c("G",
"G", "A", "A", "T", "T"), OBS_CT = c(1597L, 1597L, 1597L,
1597L, 1597L, 1597L), BETA = c(0.0147047, 0.0138673, -0.0126002,
0.00515331, 0.00415908, 0.00597402), SE = c(0.0656528, 0.0269643,
0.0256229, 0.022282, 0.0220529, 0.0217018), P = c(0.822804,
0.607124, 0.622959, 0.817129, 0.850434, 0.783139), SD = c(2.62364886486368,
1.07756036432328, 1.02395469042471, 0.890444032956592, 0.88128862823752,
0.867257800664993), Variance = c(6.88353336610048, 1.16113633876053,
1.04848320804277, 0.792890575828, 0.77666964626077, 0.75213609281428
)), row.names = c(9L, 23L, 38L, 66L, 76L, 97L), class = "data.frame"))
Comments
Post a Comment