@article{ETD, recid = {2707}, author = {Whelan, Christopher}, title = {Detecting and analyzing genomic structural variation using distributed computing}, school = {Ph.D.}, address = {2014}, number = {ETD}, abstract = {Genomic structural variations are an important class of genetic variants with a wide va- riety of functional impacts. The detection of structural variations using high-throughput short-read sequencing data is a difficult problem, and published algorithms do not pro- vide the sensitivity and specificity required in research and clinical settings. Meanwhile, high-throughput sequencing is rapidly generating ever-larger data sets, necessitating the development of algorithms that can provide results rapidly and scale to use cloud and cluster infrastructures. MapReduce and Hadoop are becoming a standard for managing the distributed processing of large data sets, but existing structural variation detection approaches are difficult to translate into the MapReduce framework. We have formulated a general framework for structural variation detection in MapReduce, and implemented a software package called Cloudbreak, which detects genomic deletions and insertions with very high accuracy compared to existing popular tools.}, url = {http://digitalcollections.ohsu.edu/record/2707}, doi = {https://doi.org/10.6083/M4DJ5CZ2}, }