@article{IR, author = {Karlberg, Brian and Lee, Jordan and Wong, Chris and Stuart, Josh and Ellrott, Kyle}, url = {http://digitalcollections.ohsu.edu/record/9633}, title = {Machine learning for cancer subtyping: sampling effects on predictive accuracy and feature selection}, publisher = {Oregon Health and Science University}, abstract = {Accurate classification of cancer subtypes based on genomic data is a key component of precision oncology. Subtyping allows for better alignment of therapeutics with the specific biology of an individual patient's tumor. Training predictive models for classification requires collection, sequencing, and labeling of patient samples which incurs significant cost and thus motivates an estimation of the minimum number of samples required to attain a particular classification accuracy. In this work, a minimum sample size estimation strategy for machine learning cancer subtype prediction is developed by combining a subsampling method for learning curve generation with an inverse power law curve fitting method.}, number = {IR}, doi = {https://doi.org/10.6083/g158bj19x}, recid = {9633}, address = {2022-04-21}, }