@inproceedings{0cfecaf113f8435d91e68bb5b13bdb46,
title = "Multimodal Deep Learning and Fast Retrieval for Recommendation",
abstract = "We propose a retrieval architecture in the context of recommender systems for e-commerce applications, based on a multi-modal representation of the items of interest (textual description and images of the products), paired with a locality-sensitive hashing (LSH) indexing scheme for the fast retrieval of the potential recommendations. In particular, we learn a latent multimodal representation of the items through the use of CLIP architecture, combining text and images in a contrastive way. The item embeddings thus generated are then searched by means of different types of LSH. We report on the experiments we performed on two real-world datasets from e-commerce sites, containing both images and textual descriptions of the products.",
keywords = "Locality sensitive hashing, Multimodal embeddings, Recommender systems",
author = "Daniele Ciarlo and Luigi Portinale",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 26th International Symposium on Methodologies for Intelligent Systems, ISMIS 2022 ; Conference date: 03-10-2022 Through 05-10-2022",
year = "2022",
doi = "10.1007/978-3-031-16564-1_6",
language = "English",
isbn = "9783031165634",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "52--60",
editor = "Michelangelo Ceci and Sergio Flesca and Elio Masciari and Giuseppe Manco and Ra{\'s}, {Zbigniew W.} and Ra{\'s}, {Zbigniew W.}",
booktitle = "Foundations of Intelligent Systems - 26th International Symposium, ISMIS 2022, Proceedings",
address = "Germany",
}