text-similarity
Contents
text-similarity#
ANLI#
Original website, https://huggingface.co/datasets/anli
download#
Citation#
@InProceedings{nie2019adversarial,
title={Adversarial NLI: A New Benchmark for Natural Language Understanding},
author={Nie, Yixin
and Williams, Adina
and Dinan, Emily
and Bansal, Mohit
and Weston, Jason
and Kiela, Douwe},
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
year = "2020",
publisher = "Association for Computational Linguistics",
}
MNLI#
Original website, https://cims.nyu.edu/~sbowman/multinli/
download#
Citation#
@InProceedings{N18-1101,
author = "Williams, Adina
and Nangia, Nikita
and Bowman, Samuel",
title = "A Broad-Coverage Challenge Corpus for
Sentence Understanding through Inference",
booktitle = "Proceedings of the 2018 Conference of
the North American Chapter of the
Association for Computational Linguistics:
Human Language Technologies, Volume 1 (Long
Papers)",
year = "2018",
publisher = "Association for Computational Linguistics",
pages = "1112--1122",
location = "New Orleans, Louisiana",
url = "http://aclweb.org/anthology/N18-1101"
}
Quora#
download#
part1, https://f000.backblazeb2.com/file/malay-dataset/text-similarity/quora/0-100k.json
part2, https://f000.backblazeb2.com/file/malay-dataset/text-similarity/quora/100k-200k.json
part3, https://f000.backblazeb2.com/file/malay-dataset/text-similarity/quora/200k-300k.json
part4, https://f000.backblazeb2.com/file/malay-dataset/text-similarity/quora/300k-400k.json
part5, https://f000.backblazeb2.com/file/malay-dataset/text-similarity/quora/400k-500k.json
Citation#
@misc{kaggle, title={Quora Question Pairs}, url={https://www.kaggle.com/c/quora-question-pairs}, journal={Kaggle}}
SNLI#
Original website, https://nlp.stanford.edu/projects/snli/
download#
Citation#
Samuel R. Bowman, Gabor Angeli, Christopher Potts, and Christopher D. Manning. 2015. A large annotated corpus for learning natural language inference. In Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP). [pdf] [bib]
how-to#
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part1.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part2.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part3.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part4.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part5.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part6.json
https://f000.backblazeb2.com/file/malay-dataset/text-similarity/snli/part7.json