@inproceedings{d74c94dc5ffd4eaf9183d210ab54ea68,
title = "LLMs4SchemaDiscovery: A Human-in-the-Loop Workflow for Scientific Schema Mining with Large Language Models",
abstract = "Extracting structured information from unstructured text is crucial for modeling real-world processes, but traditional schema mining relies on semi-structured data, limiting scalability. This paper introduces schema-miner, a novel tool that combines large language models with human feedback to automate and refine schema extraction. Through an iterative workflow, it organizes properties from text, incorporates expert input, and integrates domain-specific ontologies for semantic depth. Applied to materials science—specifically atomic layer deposition—schema-miner demonstrates that expert-guided LLMs generate semantically rich schemas suitable for diverse real-world applications.",
keywords = "Human-in-the-loop Workflow, Large Language Models, Schema Discovery, Schema Mining, Scientific Schemas",
author = "Sameer Sadruddin and Jennifer D{\textquoteright}Souza and Eleni Poupaki and Alex Watkins and {Babaei Giglou}, Hamed and Anisa Rula and Bora Karasulu and S{\"o}ren Auer and Adrie Mackus and Erwin Kessels",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; 22nd European Semantic Web Conference, ESWC 2025, ESWC 2025 ; Conference date: 01-06-2025 Through 05-06-2025",
year = "2025",
month = may,
day = "31",
doi = "10.1007/978-3-031-94578-6_14",
language = "English",
isbn = "9783031945779",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "244--261",
editor = "Edward Curry and Maribel Acosta and Maria Poveda-Villal{\'o}n and {van Erp}, Marieke and Adegboyega Ojo and Katja Hose and Cogan Shimizu and Pasquale Lisena",
booktitle = "The Semantic Web",
address = "Germany",
}