@article{ETD, recid = {43715}, author = {Pipalia, Amrish}, title = {Comparative analysis of NLP techniques for automated matching of medical intake forms to the FHIR data schema: embedding similarity and language models}, publisher = {Oregon Health and Science University}, school = {M.S.}, address = {2024-09-20}, number = {ETD}, abstract = {Patient data entry burden remains a challenge. This study evaluated vector embedding search and large language models (LLMs) for automated schema matching between medical intake forms and the Fast Healthcare Interoperability Resources (FHIR) schema. LLMs outperformed vector embedding search, and smaller LLMs performed comparably to larger models. The highest F1 scores for FHIR resource and element matching ranged from 0.63 to 0.80. Semi-automated solutions may be viable, but FHIR schema complexity remains a major challenge.}, url = {http://digitalcollections.ohsu.edu/record/43715}, doi = {https://doi.org/10.6083/bpxhc43715}, }