License: Creative Commons Attribution 4.0 International license (CC BY 4.0)
When quoting this document, please refer to the following
DOI: 10.4230/OASIcs.LDK.2021.17
URN: urn:nbn:de:0030-drops-145532
URL: http://dagstuhl.sunsite.rwth-aachen.de/volltexte/2021/14553/
Harrando, Ismail ;
Troncy, Raphaël
Explainable Zero-Shot Topic Extraction Using a Common-Sense Knowledge Graph
Abstract
Pre-trained word embeddings constitute an essential building block for many NLP systems and applications, notably when labeled data is scarce. However, since they compress word meanings into a fixed-dimensional representation, their use usually lack interpretability beyond a measure of similarity and linear analogies that do not always reflect real-world word relatedness, which can be important for many NLP applications. In this paper, we propose a model which extracts topics from text documents based on the common-sense knowledge available in ConceptNet [Speer et al., 2017] - a semantic concept graph that explicitly encodes real-world relations between words - and without any human supervision. When combining both ConceptNet’s knowledge graph and graph embeddings, our approach outperforms other baselines in the zero-shot setting, while generating a human-understandable explanation for its predictions through the knowledge graph. We study the importance of some modeling choices and criteria for designing the model, and we demonstrate that it can be used to label data for a supervised classifier to achieve an even better performance without relying on any humanly-annotated training data. We publish the code of our approach at https://github.com/D2KLab/ZeSTE and we provide a user friendly demo at https://zeste.tools.eurecom.fr/.
BibTeX - Entry
@InProceedings{harrando_et_al:OASIcs.LDK.2021.17,
author = {Harrando, Ismail and Troncy, Rapha\"{e}l},
title = {{Explainable Zero-Shot Topic Extraction Using a Common-Sense Knowledge Graph}},
booktitle = {3rd Conference on Language, Data and Knowledge (LDK 2021)},
pages = {17:1--17:15},
series = {Open Access Series in Informatics (OASIcs)},
ISBN = {978-3-95977-199-3},
ISSN = {2190-6807},
year = {2021},
volume = {93},
editor = {Gromann, Dagmar and S\'{e}rasset, Gilles and Declerck, Thierry and McCrae, John P. and Gracia, Jorge and Bosque-Gil, Julia and Bobillo, Fernando and Heinisch, Barbara},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/opus/volltexte/2021/14553},
URN = {urn:nbn:de:0030-drops-145532},
doi = {10.4230/OASIcs.LDK.2021.17},
annote = {Keywords: Topic Extraction, Zero-Shot Classification, Explainable NLP, Knowledge Graph}
}