dictionary#

200k-English-Malay#

Gathered from Google Translate.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, 200k-English-Malay,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/200k-english-malay}}
}

Antonym#

Gathered from Google Translate.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Antonym,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/antonym}}
}

Cambridge English-Malaysian#

Crawled from https://dictionary.cambridge.org/browse/english-malaysian/, 25171 english-malaysian words.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Crawled Cambridge English-Malaysian,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/cambrige}}
}

DBP#

Crawled from https://prpm.dbp.gov.my/Cari1?keyword=, 78670 words.

{
"definisi": [
"[me.nya.bit.kan]  |  \u0645\u06bd\u0627\u0628\u064a\u062a\u06a9\u0646Definisi : menyabit utk: petang itu dia ~ ayahnya rumput utk lembu ternakan mereka;\u00a0(Kamus Dewan Edisi Keempat)"
],
"tesaurus": [
"bersabit",
"menyabitkan"
]
}

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Crawled DBP,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/cambrige}}
}

Dialect#

Gathered from http://prpm.dbp.gov.my/Cari1?keyword=%3d&d=150348&

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Dialect,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/dialect}}
}

Translated Emoji#

Translate https://unicode.org/Public/emoji/15.0/emoji-test.txt into malay.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Translated Emoji,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/emoji}}
}

IPA#

Mirror for https://raw.githubusercontent.com/open-dict-data/ipa-dict/master/data/ma.txt, 28k samples

Citation#

@misc{open-dict-data, title={Open-dict-data/IPA-dict: Monolingual wordlists with pronunciation information in IPA}, url={https://github.com/open-dict-data/ipa-dict}, journal={GitHub}, author={Open-Dict-Data}}

Ngram#

Unigram and Bigram dictionaries from Bahasa wikipedia.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, Ngram,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/ngram}}
}

Synonym#

Gathered from Google Translate.

Citation#

@misc{Malay-Dataset, We gather Bahasa Malaysia corpus!, 90k-Malay-Synonym,
author = {Husein, Zolkepli},
title = {Malay-Dataset},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/huseinzol05/malay-dataset/tree/master/dictionary/synonym}}
}