@article{b7f2e9e8080344a880c1534478b413d8,
title = "Low-coverage sequencing cost-effectively detects known and novel variation in underrepresented populations",
abstract = "Genetic studies in underrepresented populations identify disproportionate numbers of novel associations. However, most genetic studies use genotyping arrays and sequenced reference panels that best capture variation most common in European ancestry populations. To compare data generation strategies best suited for underrepresented populations, we sequenced the whole genomes of 91 individuals to high coverage as part of the Neuropsychiatric Genetics of African Population-Psychosis (NeuroGAP-Psychosis) study with participants from Ethiopia, Kenya, South Africa, and Uganda. We used a downsampling approach to evaluate the quality of two cost-effective data generation strategies, GWAS arrays versus low-coverage sequencing, by calculating the concordance of imputed variants from these technologies with those from deep whole-genome sequencing data. We show that low-coverage sequencing at a depth of ≥4× captures variants of all frequencies more accurately than all commonly used GWAS arrays investigated and at a comparable cost. Lower depths of sequencing (0.5–1×) performed comparably to commonly used low-density GWAS arrays. Low-coverage sequencing is also sensitive to novel variation; 4× sequencing detects 45% of singletons and 95% of common variants identified in high-coverage African whole genomes. Low-coverage sequencing approaches surmount the problems induced by the ascertainment of common genotyping arrays, effectively identify novel variation particularly in underrepresented populations, and present opportunities to enhance variant discovery at a cost similar to traditional approaches.",
keywords = "Africa, GWAS, GWAS arrays, cost comparison, low-coverage sequencing, study design, whole-genome sequencing",
author = "{the NeuroGAP-Psychosis Study Team} and Martin, {Alicia R.} and Atkinson, {Elizabeth G.} and Chapman, {Sin{\'e}ad B.} and Anne Stevenson and Stroud, {Rocky E.} and Tamrat Abebe and Dickens Akena and Melkam Alemayehu and Ashaba, {Fred K.} and Lukoye Atwoli and Tera Bowers and Chibnik, {Lori B.} and Daly, {Mark J.} and Timothy DeSmet and Sheila Dodge and Abebaw Fekadu and Steven Ferriera and Bizu Gelaye and Stella Gichuru and Injera, {Wilfred E.} and Roxanne James and Kariuki, {Symon M.} and Gabriel Kigen and Koenen, {Karestan C.} and Edith Kwobah and Joseph Kyebuzibwa and Lerato Majara and Henry Musinguzi and Mwema, {Rehema M.} and Neale, {Benjamin M.} and Newman, {Carter P.} and Newton, {Charles R.J.C.} and Pickrell, {Joseph K.} and Raj Ramesar and Welelta Shiferaw and Stein, {Dan J.} and Solomon Teferra and {van der Merwe}, Celia and Zukiswa Zingela",
note = "Funding Information: We thank Juha Karjalainen for his help setting up and troubleshooting a Cromwell server for running all workflows. We thank Laura Gauthier for explanations of components of the Broad Institute Data Science Platform pipelines. We would also like to thank Moses Joloba at Makerere University College of Health Sciences in Kampala, Uganda. This study was funded by the Stanley Center for Psychiatric Research at the Broad Institute. This work was supported by funding from the National Institutes of Health (K99/R00MH117229 to A.R.M.; K01MH121659 and T32MH017119 to E.G.A.). L.B.C. B.G. K.C.K. D.J.S. S.T. and D.A. are supported, in part, by R01MH120642. A.F. is supported by the Medical Research Council and Department for International Development through the Africa Research Leader scheme. Funding Information: We thank Juha Karjalainen for his help setting up and troubleshooting a Cromwell server for running all workflows. We thank Laura Gauthier for explanations of components of the Broad Institute Data Science Platform pipelines. We would also like to thank Moses Joloba at Makerere University College of Health Sciences in Kampala, Uganda. This study was funded by the Stanley Center for Psychiatric Research at the Broad Institute . This work was supported by funding from the National Institutes of Health ( K99/R00MH117229 to A.R.M.; K01MH121659 and T32MH017119 to E.G.A.). L.B.C., B.G., K.C.K., D.J.S., S.T., and D.A. are supported, in part, by R01MH120642 . A.F. is supported by the Medical Research Council and Department for International Development through the Africa Research Leader scheme. Publisher Copyright: {\textcopyright} 2021 American Society of Human Genetics",
year = "2021",
month = apr,
day = "1",
doi = "10.1016/j.ajhg.2021.03.012",
language = "English",
volume = "108",
pages = "656--668",
journal = "American Journal of Human Genetics",
issn = "0002-9297",
publisher = "Cell Press",
number = "4",
}