keyphrase
Contents
keyphrase#
KPTimes#
Original website, https://github.com/ygorg/KPTimes
download#
part1, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-0.json.translate
part2, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-50000.json.translate
part3, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-100000.json.translate
part4, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-150000.json.translate
part5, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-200000.json.translate
part6, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-250000.json.translate
part7, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-300000.json.translate
part8, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-350000.json.translate
part9, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-400000.json.translate
part10, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-450000.json.translate
part11, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-500000.json.translate
part12, https://f000.backblazeb2.com/file/malay-dataset/keyphrase/kptimes/kptimes-550000.json.translate
Citation#
@inproceedings{gallina2019kptimes,
title={KPTimes: A Large-Scale Dataset for Keyphrase Generation on News Documents},
author={Gallina, Ygor and Boudin, Florian and Daille, B{\'e}atrice},
booktitle={Proceedings of the 12th International Conference on Natural Language Generation},
pages={130--135},
year={2019}
}
OpenKP#
Original paper, https://arxiv.org/abs/1611.09268
Original website, https://github.com/microsoft/OpenKP
download#
Citation#
article{DBLP:journals/corr/NguyenRSGTMD16,
author = {Tri Nguyen and
Mir Rosenberg and
Xia Song and
Jianfeng Gao and
Saurabh Tiwary and
Rangan Majumder and
Li Deng},
title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset},
journal = {CoRR},
volume = {abs/1611.09268},
year = {2016},
url = {http://arxiv.org/abs/1611.09268},
archivePrefix = {arXiv},
eprint = {1611.09268},
timestamp = {Mon, 13 Aug 2018 16:49:03 +0200},
biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Twitter Bahasa Keyphrase#
Extract Keywords from Twitter using Lexicon.
download#
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Extract Keywords from Twitter using Lexicon,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/keyphrase/twitter-bahasa}}
}
Xwikis Keyphrase#
Originally from https://huggingface.co/datasets/GEM/xwikis
Translate using T5-Bahasa.
download#
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Translate keywords from Xwikis,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/keyphrase/xwikis}}
}