summarization
Contents
summarization#
ChatGPT Bahasa News Summarization#
CNN News#
Original paper, https://arxiv.org/pdf/1704.04368.pdf
download#
Citation#
@article{DBLP:journals/corr/SeeLM17,
author = {Abigail See and
Peter J. Liu and
Christopher D. Manning},
title = {Get To The Point: Summarization with Pointer-Generator Networks},
journal = {CoRR},
volume = {abs/1704.04368},
year = {2017},
url = {http://arxiv.org/abs/1704.04368},
archivePrefix = {arXiv},
eprint = {1704.04368},
timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/SeeLM17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
CNN News#
Original paper, https://arxiv.org/pdf/1704.04368.pdf
download#
translated-cnn.json, https://f000.backblazeb2.com/file/malay-dataset/summarization/cnn-news/translated-cnn.json
train set, https://f000.backblazeb2.com/file/malay-dataset/summarization/cnn-news/translated-cnn-train.json
test set, https://f000.backblazeb2.com/file/malay-dataset/summarization/cnn-news/translated-cnn-test.json
Citation#
@article{DBLP:journals/corr/SeeLM17,
author = {Abigail See and
Peter J. Liu and
Christopher D. Manning},
title = {Get To The Point: Summarization with Pointer-Generator Networks},
journal = {CoRR},
volume = {abs/1704.04368},
year = {2017},
url = {http://arxiv.org/abs/1704.04368},
archivePrefix = {arXiv},
eprint = {1704.04368},
timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/SeeLM17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Download#
part1, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/translated-0-5000.json
part2, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-5000-10000.json
part3, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/translated-10000-20000.json
part4, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/translated-20000-30000.json
part5, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-30000-40000.json
part6, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-50000-60000.json
part7, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-60000-70000.json
part8, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-70000-80000.json
part9, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-80000-90000.json
part10, https://f000.backblazeb2.com/file/malay-dataset/summary/cnn/cnn-news-translated-90000-100000.json
Dailymail#
Original paper, https://arxiv.org/pdf/1704.04368.pdf
download#
translated-dailymail.json, https://f000.backblazeb2.com/file/malay-dataset/summarization/dailymail/translated-dailymail.json
Citation#
@article{DBLP:journals/corr/SeeLM17,
author = {Abigail See and
Peter J. Liu and
Christopher D. Manning},
title = {Get To The Point: Summarization with Pointer-Generator Networks},
journal = {CoRR},
volume = {abs/1704.04368},
year = {2017},
url = {http://arxiv.org/abs/1704.04368},
archivePrefix = {arXiv},
eprint = {1704.04368},
timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/SeeLM17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Gigawords#
download#
part1, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-0.json
part2, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-100000.json
part3, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-200000.json
part4, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-300000.json
part5, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-400000.json
part6, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-500000.json
part7, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-600000.json
part8, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-700000.json
part9, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-800000.json
part10, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-900000.json
part11, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1000000.json
part12, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1100000.json
part13, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1200000.json
part14, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1300000.json
part15, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1400000.json
part16, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1500000.json
part17, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1600000.json
part18, https://f000.backblazeb2.com/file/malay-dataset/summary/gigawords/translated-1700000.json
Citation#
@article{graff2003english,
title={English gigaword},
author={Graff, David and Kong, Junbo and Chen, Ke and Maeda, Kazuaki},
journal={Linguistic Data Consortium, Philadelphia},
volume={4},
number={1},
pages={34},
year={2003}
}
@article{Rush_2015,
title={A Neural Attention Model for Abstractive Sentence Summarization},
url={http://dx.doi.org/10.18653/v1/D15-1044},
DOI={10.18653/v1/d15-1044},
journal={Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing},
publisher={Association for Computational Linguistics},
author={Rush, Alexander M. and Chopra, Sumit and Weston, Jason},
year={2015}
}
Multinews#
download#
prefix, https://f000.backblazeb2.com/file/malay-dataset/
summary/multinews/translated-multinews-0.json
summary/multinews/translated-multinews-10500.json
summary/multinews/translated-multinews-11200.json
summary/multinews/translated-multinews-11900.json
summary/multinews/translated-multinews-12600.json
summary/multinews/translated-multinews-13300.json
summary/multinews/translated-multinews-1400.json
summary/multinews/translated-multinews-14000.json
summary/multinews/translated-multinews-14700.json
summary/multinews/translated-multinews-15400.json
summary/multinews/translated-multinews-16100.json
summary/multinews/translated-multinews-16800.json
summary/multinews/translated-multinews-17500.json
summary/multinews/translated-multinews-18200.json
summary/multinews/translated-multinews-18900.json
summary/multinews/translated-multinews-19600.json
summary/multinews/translated-multinews-20300.json
summary/multinews/translated-multinews-2100.json
summary/multinews/translated-multinews-21000.json
summary/multinews/translated-multinews-21700.json
summary/multinews/translated-multinews-22400.json
summary/multinews/translated-multinews-23100.json
summary/multinews/translated-multinews-23800.json
summary/multinews/translated-multinews-24500.json
summary/multinews/translated-multinews-25200.json
summary/multinews/translated-multinews-25900.json
summary/multinews/translated-multinews-26600.json
summary/multinews/translated-multinews-27300.json
summary/multinews/translated-multinews-2800.json
summary/multinews/translated-multinews-28000.json
summary/multinews/translated-multinews-28700.json
summary/multinews/translated-multinews-29400.json
summary/multinews/translated-multinews-30100.json
summary/multinews/translated-multinews-30800.json
summary/multinews/translated-multinews-31500.json
summary/multinews/translated-multinews-32200.json
summary/multinews/translated-multinews-32900.json
summary/multinews/translated-multinews-33600.json
summary/multinews/translated-multinews-34300.json
summary/multinews/translated-multinews-3500.json
summary/multinews/translated-multinews-35000.json
summary/multinews/translated-multinews-35700.json
summary/multinews/translated-multinews-36400.json
summary/multinews/translated-multinews-37100.json
summary/multinews/translated-multinews-37800.json
summary/multinews/translated-multinews-38500.json
summary/multinews/translated-multinews-39200.json
summary/multinews/translated-multinews-39900.json
summary/multinews/translated-multinews-40600.json
summary/multinews/translated-multinews-41300.json
summary/multinews/translated-multinews-4200.json
summary/multinews/translated-multinews-42000.json
summary/multinews/translated-multinews-42700.json
summary/multinews/translated-multinews-43400.json
summary/multinews/translated-multinews-44100.json
summary/multinews/translated-multinews-44800.json
summary/multinews/translated-multinews-45500.json
summary/multinews/translated-multinews-46200.json
summary/multinews/translated-multinews-46900.json
summary/multinews/translated-multinews-47600.json
summary/multinews/translated-multinews-48300.json
summary/multinews/translated-multinews-4900.json
summary/multinews/translated-multinews-49000.json
summary/multinews/translated-multinews-49700.json
summary/multinews/translated-multinews-50400.json
summary/multinews/translated-multinews-51100.json
summary/multinews/translated-multinews-51800.json
summary/multinews/translated-multinews-52500.json
summary/multinews/translated-multinews-53200.json
summary/multinews/translated-multinews-53900.json
summary/multinews/translated-multinews-54600.json
summary/multinews/translated-multinews-55300.json
summary/multinews/translated-multinews-5600.json
summary/multinews/translated-multinews-56000.json
summary/multinews/translated-multinews-6300.json
summary/multinews/translated-multinews-700.json
summary/multinews/translated-multinews-7000.json
summary/multinews/translated-multinews-7700.json
summary/multinews/translated-multinews-8400.json
summary/multinews/translated-multinews-9100.json
summary/multinews/translated-multinews-9800.json
Citation#
@article{DBLP:journals/corr/abs-1906-01749,
author = {Alexander R. Fabbri and
Irene Li and
Tianwei She and
Suyi Li and
Dragomir R. Radev},
title = {Multi-News: a Large-Scale Multi-Document Summarization Dataset and
Abstractive Hierarchical Model},
journal = {CoRR},
volume = {abs/1906.01749},
year = {2019},
url = {http://arxiv.org/abs/1906.01749},
archivePrefix = {arXiv},
eprint = {1906.01749},
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1906-01749.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Semisupervised Bahasa News Summarization#
download#
how-to#
Sentiment labels for cnn-news, multinews, and semisupervised.
cnn#
multinews#
semisupervised#
news title#
part 1, https://f000.backblazeb2.com/file/malay-dataset/news/summary/sentiment/news-title-0.tsv.sentiment
part 2, https://f000.backblazeb2.com/file/malay-dataset/news/summary/sentiment/news-title-1.tsv.sentiment
part 3, https://f000.backblazeb2.com/file/malay-dataset/news/summary/sentiment/news-title-2.tsv.sentiment
part 4, https://f000.backblazeb2.com/file/malay-dataset/news/summary/sentiment/news-title-3.tsv.sentiment
Xwikis#
Original paper, https://arxiv.org/abs/2202.09583
Huggingface page, https://huggingface.co/datasets/GEM/xwikis
download#
Citation#
@article{DBLP:journals/corr/SeeLM17,
author = {Abigail See and
Peter J. Liu and
Christopher D. Manning},
title = {Get To The Point: Summarization with Pointer-Generator Networks},
journal = {CoRR},
volume = {abs/1704.04368},
year = {2017},
url = {http://arxiv.org/abs/1704.04368},
archivePrefix = {arXiv},
eprint = {1704.04368},
timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/SeeLM17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}