dictionary
Contents
dictionary#
200k-English-Malay#
Gathered from Google Translate.
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, 200k-English-Malay,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/200k-english-malay}}
}
Antonym#
Gathered from Google Translate.
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Antonym,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/antonym}}
}
Cambridge English-Malaysian#
Crawled from https://dictionary.cambridge.org/browse/english-malaysian/, 25171 english-malaysian words.
download#
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Crawled Cambridge English-Malaysian,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/cambrige}}
}
DBP#
Crawled from https://prpm.dbp.gov.my/Cari1?keyword=, 78670 words.
{
"definisi": [
"[me.nya.bit.kan] | \u0645\u06bd\u0627\u0628\u064a\u062a\u06a9\u0646Definisi : menyabit utk: petang itu dia ~ ayahnya rumput utk lembu ternakan mereka;\u00a0(Kamus Dewan Edisi Keempat)"
],
"tesaurus": [
"bersabit",
"menyabitkan"
]
}
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Crawled DBP,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/cambrige}}
}
Dialect#
Gathered from http://prpm.dbp.gov.my/Cari1?keyword=%3d&d=150348&
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Dialect,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/dialect}}
}
Translated Emoji#
Translate https://unicode.org/Public/emoji/15.0/emoji-test.txt into malay.
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Translated Emoji,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/emoji}}
}
IPA#
Mirror for https://raw.githubusercontent.com/open-dict-data/ipa-dict/master/data/ma.txt, 28k samples
Citation#
@misc{open-dict-data, title={Open-dict-data/IPA-dict: Monolingual wordlists with pronunciation information in IPA}, url={https://github.com/open-dict-data/ipa-dict}, journal={GitHub}, author={Open-Dict-Data}}
Kamus Dewan#
Ngram#
Unigram and Bigram dictionaries from Bahasa wikipedia.
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Ngram,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/ngram}}
}
Synonym#
Gathered from Google Translate.
Citation#
@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, 90k-Malay-Synonym,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/synonym}}
}
Wiktionary Bahasa#
Filtered https://kaikki.org/dictionary/rawdata.html on bahasa.