<style>/*<link href='https://www.blogger.com/dyn-css/authorization.css?targetBlogID=7611661725624235516&zx=ed3c376b-e114-421b-931d-d55f4c099386' media='none' onload='if(media!='all')media='all'' rel='stylesheet'/><noscript><link href='https://www.blogger.com/dyn-css/authorization.css?targetBlogID=7611661725624235516&zx=ed3c376b-e114-421b-931d-d55f4c099386' rel='stylesheet'/></noscript>
<meta name='google-adsense-platform-account' content='ca-host-pub-1556223355139109'/>
<meta name='google-adsense-platform-domain' content='blogspot.com'/>

</head><body>*/</style>

Corpus dalam Natural Language Processing

Root

Nov 30, 2014

Ketika kita berurusan dengan Natural Language Processing (NLP) kita sangat sering sekali menemukan istilah Corpus. Sebenarnya apa sih Corpus??

Corpus adalah kumpulan teks yang sangat besar, digunakan analisis linguistik, biasanya disimpan dalam database elektronik sehingga sekumpulan teks yang sangat besar dan banyak tersebut dapat diakses dengan mudah dari komputer. Teks Corpus biasanya terdiri dari ratusan ribu bahkan jutaan kata-kata.

Bentuk jamak dari Corpus adalah Corpora, beberapa corpora populer adalah International Corpus of English (ICE), the British National Corpus (BNC), COBUILD/Birmingham Corpus, IBM/Lancaster Spoken English Corpus dan IBM/Lancaster Spoken English Corpus.

Corpus bisa terdiri dari bahasa tulisan, bahasa lisan atau keduanya, Analisis Corpus memberikan informasi leksikal, informasi morfosintaktis, informasi semantik dan informasi pragmatis.

Kaitannya Corpus atau dengan Natural Language Processing (NLP), yaitu Corpus adalah teks berukuran besar yang biasanya digunakan untuk data training dalam bidang NLP.

Dari penjelasan diatas, mudah-mudahan bisa jadi acuan atau pencerahan terhadap istilah Corpus atau Corpora. :)

<style>/*
<script type="text/javascript" src="https://www.blogger.com/static/v1/widgets/3432011497-widgets.js"></script>
<script type='text/javascript'>
window['__wavt'] = 'AEUoTZqqO_vBpqh0uai8UiGWeEPi:1785210764748';_WidgetManager._Init('//www.blogger.com/rearrange?blogID\x3d7611661725624235516','//www.nono.my.id/2014/11/corpus-dalam-natural-language-processing.html','7611661725624235516');
_WidgetManager._SetDataContext([{'name': 'blog', 'data': {'blogId': '7611661725624235516', 'title': 'Nono Heryana', 'url': 'https://www.nono.my.id/2014/11/corpus-dalam-natural-language-processing.html', 'canonicalUrl': 'https://www.nono.my.id/2014/11/corpus-dalam-natural-language-processing.html', 'homepageUrl': 'https://www.nono.my.id/', 'searchUrl': 'https://www.nono.my.id/search', 'canonicalHomepageUrl': 'https://www.nono.my.id/', 'blogspotFaviconUrl': 'https://www.nono.my.id/favicon.ico', 'bloggerUrl': 'https://www.blogger.com', 'hasCustomDomain': true, 'httpsEnabled': true, 'enabledCommentProfileImages': true, 'gPlusViewType': 'FILTERED_POSTMOD', 'adultContent': false, 'analyticsAccountNumber': 'G-38J5V42TT6', 'analytics4': true, 'encoding': 'UTF-8', 'locale': 'id', 'localeUnderscoreDelimited': 'id', 'languageDirection': 'ltr', 'isPrivate': false, 'isMobile': false, 'isMobileRequest': false, 'mobileClass': '', 'isPrivateBlog': false, 'isDynamicViewsAvailable': true, 'feedLinks': '\x3clink rel\x3d\x22alternate\x22 type\x3d\x22application/atom+xml\x22 title\x3d\x22Nono Heryana - Atom\x22 href\x3d\x22https://www.nono.my.id/feeds/posts/default\x22 /\x3e\n\x3clink rel\x3d\x22alternate\x22 type\x3d\x22application/rss+xml\x22 title\x3d\x22Nono Heryana - RSS\x22 href\x3d\x22https://www.nono.my.id/feeds/posts/default?alt\x3drss\x22 /\x3e\n\x3clink rel\x3d\x22service.post\x22 type\x3d\x22application/atom+xml\x22 title\x3d\x22Nono Heryana - Atom\x22 href\x3d\x22https://www.blogger.com/feeds/7611661725624235516/posts/default\x22 /\x3e\n\n\x3clink rel\x3d\x22alternate\x22 type\x3d\x22application/atom+xml\x22 title\x3d\x22Nono Heryana - Atom\x22 href\x3d\x22https://www.nono.my.id/feeds/6309040787292703493/comments/default\x22 /\x3e\n', 'meTag': '', 'adsenseHostId': 'ca-host-pub-1556223355139109', 'adsenseHasAds': false, 'adsenseAutoAds': false, 'boqCommentIframeForm': true, 'loginRedirectParam': '', 'view': '', 'dynamicViewsCommentsSrc': '//www.blogblog.com/dynamicviews/4224c15c4e7c9321/js/comments.js', 'dynamicViewsScriptSrc': '//www.blogblog.com/dynamicviews/9e57f68d7dc5b78f', 'plusOneApiSrc': 'https://apis.google.com/js/platform.js', 'disableGComments': true, 'interstitialAccepted': false, 'sharing': {'platforms': [{'name': 'Dapatkan link', 'key': 'link', 'shareMessage': 'Dapatkan link', 'target': ''}, {'name': 'Facebook', 'key': 'facebook', 'shareMessage': 'Bagikan ke Facebook', 'target': 'facebook'}, {'name': 'BlogThis!', 'key': 'blogThis', 'shareMessage': 'BlogThis!', 'target': 'blog'}, {'name': 'X', 'key': 'twitter', 'shareMessage': 'Bagikan ke X', 'target': 'twitter'}, {'name': 'Pinterest', 'key': 'pinterest', 'shareMessage': 'Bagikan ke Pinterest', 'target': 'pinterest'}, {'name': 'Email', 'key': 'email', 'shareMessage': 'Email', 'target': 'email'}], 'disableGooglePlus': true, 'googlePlusShareButtonWidth': 0, 'googlePlusBootstrap': '\x3cscript type\x3d\x22text/javascript\x22\x3ewindow.___gcfg \x3d {\x27lang\x27: \x27id\x27};\x3c/script\x3e'}, 'hasCustomJumpLinkMessage': true, 'jumpLinkMessage': 'Read more \xbb', 'pageType': 'item', 'postId': '6309040787292703493', 'postImageThumbnailUrl': 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjn4EfaAfnWTHchAUSvA3becMOplnsFmLNfb0-Im6HUOstIycH9l25fGMydO5_0XVuePrgdNpoy5PZKFWT4SX5QuWidRVKYDkNPP-I_AGpRsJ20KOGHBTMpIgafj2ZWppxrAJpAr5E1BVrt/s72-c/tree+corpus.gif', 'postImageUrl': 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjn4EfaAfnWTHchAUSvA3becMOplnsFmLNfb0-Im6HUOstIycH9l25fGMydO5_0XVuePrgdNpoy5PZKFWT4SX5QuWidRVKYDkNPP-I_AGpRsJ20KOGHBTMpIgafj2ZWppxrAJpAr5E1BVrt/s1600/tree+corpus.gif', 'pageName': 'Corpus dalam Natural Language Processing', 'pageTitle': 'Nono Heryana: Corpus dalam Natural Language Processing', 'metaDescription': ''}}, {'name': 'features', 'data': {}}, {'name': 'messages', 'data': {'edit': 'Edit', 'linkCopiedToClipboard': 'Tautan disalin ke papan klip!', 'ok': 'Oke', 'postLink': 'Tautan Pos'}}, {'name': 'template', 'data': {'name': 'custom', 'localizedName': 'Khusus', 'isResponsive': true, 'isAlternateRendering': false, 'isCustom': true}}, {'name': 'view', 'data': {'classic': {'name': 'classic', 'url': '?view\x3dclassic'}, 'flipcard': {'name': 'flipcard', 'url': '?view\x3dflipcard'}, 'magazine': {'name': 'magazine', 'url': '?view\x3dmagazine'}, 'mosaic': {'name': 'mosaic', 'url': '?view\x3dmosaic'}, 'sidebar': {'name': 'sidebar', 'url': '?view\x3dsidebar'}, 'snapshot': {'name': 'snapshot', 'url': '?view\x3dsnapshot'}, 'timeslide': {'name': 'timeslide', 'url': '?view\x3dtimeslide'}, 'isMobile': false, 'title': 'Corpus dalam Natural Language Processing', 'description': 'Blog Sistem Informasi, Fintech, Manajemen, dan Ilmu Komputer', 'featuredImage': 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjn4EfaAfnWTHchAUSvA3becMOplnsFmLNfb0-Im6HUOstIycH9l25fGMydO5_0XVuePrgdNpoy5PZKFWT4SX5QuWidRVKYDkNPP-I_AGpRsJ20KOGHBTMpIgafj2ZWppxrAJpAr5E1BVrt/s1600/tree+corpus.gif', 'url': 'https://www.nono.my.id/2014/11/corpus-dalam-natural-language-processing.html', 'type': 'item', 'isSingleItem': true, 'isMultipleItems': false, 'isError': false, 'isPage': false, 'isPost': true, 'isHomepage': false, 'isArchive': false, 'isLabelSearch': false, 'postId': 6309040787292703493}}, {'name': 'widgets', 'data': [{'title': 'Upload Image', 'type': 'Image', 'sectionId': 'upload-image', 'id': 'Image10'}, {'title': 'Logo', 'type': 'HTML', 'sectionId': 'header-main', 'id': 'HTML10'}, {'title': 'Icons, Dark, Search', 'type': 'LinkList', 'sectionId': 'header-main', 'id': 'LinkList10'}, {'title': 'Menu', 'type': 'LinkList', 'sectionId': 'header-main', 'id': 'LinkList11'}, {'title': '#Popular Weeky', 'type': 'PopularPosts', 'sectionId': 'special-home', 'id': 'PopularPosts11', 'posts': [{'title': 'HODLer Bitcoin Raup $120 Juta Saat Harga Anjlok: Sebuah Analisis', 'id': 3244759862151503373}, {'title': 'Banreservas: Bank Pembiayaan Perdagangan Terbaik Karibia \x26 Pelajaran untuk Indonesia', 'id': 1758928041508802257}, {'title': 'Tren Kripto Afrika: ABSA-Ripple, Regulasi Ghana, Blockchain.com Nigeria', 'id': 1241157731547840067}, {'title': 'Inovasi MIT: Chip Mikroelektronika Lebih Efisien \x26 Cepat', 'id': 2118531040391320849}, {'title': 'Stagnasi Regulasi Stablecoin AS: BlackRock Merambah DeFi Via Uniswap', 'id': 2880125485448988968}]}, {'title': '#Advertisement', 'type': 'HTML', 'sectionId': 'special-home', 'id': 'HTML25'}, {'title': '#CustomPost by Label', 'type': 'HTML', 'sectionId': 'special-home', 'id': 'HTML26'}, {'title': 'Featured Post', 'type': 'FeaturedPost', 'sectionId': 'before-blog', 'id': 'FeaturedPost1', 'postId': '6706452018783826676'}, {'title': '#Advertisement', 'type': 'HTML', 'sectionId': 'before-post', 'id': 'HTML12'}, {'title': 'Postingan Blog', 'type': 'Blog', 'sectionId': 'blog-post', 'id': 'Blog1', 'posts': [{'id': '6309040787292703493', 'title': 'Corpus dalam Natural Language Processing', 'featuredImage': 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjn4EfaAfnWTHchAUSvA3becMOplnsFmLNfb0-Im6HUOstIycH9l25fGMydO5_0XVuePrgdNpoy5PZKFWT4SX5QuWidRVKYDkNPP-I_AGpRsJ20KOGHBTMpIgafj2ZWppxrAJpAr5E1BVrt/s1600/tree+corpus.gif', 'showInlineAds': false}], 'footerBylines': [{'regionName': 'footer1', 'items': [{'name': 'author', 'label': 'by'}, {'name': 'timestamp', 'label': 'MMM d, yyyy'}, {'name': 'comments', 'label': 'Comment'}, {'name': 'icons', 'label': ''}, {'name': 'share', 'label': ''}]}, {'regionName': 'footer2', 'items': [{'name': 'labels', 'label': 'Tags:'}]}], 'allBylineItems': [{'name': 'author', 'label': 'by'}, {'name': 'timestamp', 'label': 'MMM d, yyyy'}, {'name': 'comments', 'label': 'Comment'}, {'name': 'icons', 'label': ''}, {'name': 'share', 'label': ''}, {'name': 'labels', 'label': 'Tags:'}]}, {'title': '#You may also like', 'type': 'HTML', 'sectionId': 'ads-post', 'id': 'HTML15'}, {'title': '#Advertisement', 'type': 'HTML', 'sectionId': 'ads-post', 'id': 'HTML16'}, {'title': 'Popular Posts', 'type': 'PopularPosts', 'sectionId': 'sidebar-static', 'id': 'PopularPosts10', 'posts': [{'title': 'Installasi MPlayer dan Multimedia codecs di Ubuntu 10.04', 'id': 4774946676056164381}, {'title': 'Inovasi MIT: Chip Mikroelektronika Lebih Efisien \x26 Cepat', 'id': 2118531040391320849}, {'title': 'HODLer Bitcoin Raup $120 Juta Saat Harga Anjlok: Sebuah Analisis', 'id': 3244759862151503373}, {'title': 'Bagaimana cara melihat versi Kernel Linux yang kita gunakan??', 'id': 3785410444973380652}, {'title': 'PCLinuxOS 2010 Gnome Review', 'id': 3050489668607705357}]}, {'title': 'Categories', 'type': 'Label', 'sectionId': 'sidebar-static', 'id': 'Label10'}, {'title': '#Recent Post', 'type': 'HTML', 'sectionId': 'sidebar-static', 'id': 'HTML19'}, {'title': 'About Us', 'type': 'HTML', 'sectionId': 'footer-widget', 'id': 'HTML21'}, {'title': 'Learn More', 'type': 'LinkList', 'sectionId': 'footer-widget', 'id': 'LinkList13'}, {'title': 'Follow Us', 'type': 'LinkList', 'sectionId': 'footer-widget', 'id': 'LinkList14'}, {'title': 'Newsletter', 'type': 'HTML', 'sectionId': 'footer-widget', 'id': 'HTML22'}, {'title': 'Copyright', 'type': 'HTML', 'sectionId': 'copyright', 'id': 'HTML23'}, {'title': 'SVG Icons', 'type': 'HTML', 'sectionId': 'jet-options', 'id': 'HTML27'}, {'title': 'License Key', 'type': 'HTML', 'sectionId': 'jet-options', 'id': 'HTML28'}]}]);
_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image10', 'upload-image', document.getElementById('Image10'), {'resize': false}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML10', 'header-main', document.getElementById('HTML10'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_LinkListView', new _WidgetInfo('LinkList10', 'header-main', document.getElementById('LinkList10'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_LinkListView', new _WidgetInfo('LinkList11', 'header-main', document.getElementById('LinkList11'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_PopularPostsView', new _WidgetInfo('PopularPosts11', 'special-home', document.getElementById('PopularPosts11'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML25', 'special-home', document.getElementById('HTML25'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML26', 'special-home', document.getElementById('HTML26'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_FeaturedPostView', new _WidgetInfo('FeaturedPost1', 'before-blog', document.getElementById('FeaturedPost1'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML12', 'before-post', document.getElementById('HTML12'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_BlogView', new _WidgetInfo('Blog1', 'blog-post', document.getElementById('Blog1'), {'cmtInteractionsEnabled': false, 'lightboxEnabled': true, 'lightboxModuleUrl': 'https://www.blogger.com/static/v1/jsbin/3899201366-lbx.js', 'lightboxCssUrl': 'https://www.blogger.com/static/v1/v-css/828616780-lightbox_bundle.css'}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML15', 'ads-post', document.getElementById('HTML15'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML16', 'ads-post', document.getElementById('HTML16'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_PopularPostsView', new _WidgetInfo('PopularPosts10', 'sidebar-static', document.getElementById('PopularPosts10'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_LabelView', new _WidgetInfo('Label10', 'sidebar-static', document.getElementById('Label10'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML19', 'sidebar-static', document.getElementById('HTML19'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML21', 'footer-widget', document.getElementById('HTML21'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_LinkListView', new _WidgetInfo('LinkList13', 'footer-widget', document.getElementById('LinkList13'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_LinkListView', new _WidgetInfo('LinkList14', 'footer-widget', document.getElementById('LinkList14'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML22', 'footer-widget', document.getElementById('HTML22'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML23', 'copyright', document.getElementById('HTML23'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML27', 'jet-options', document.getElementById('HTML27'), {}, 'displayModeFull'));
_WidgetManager._RegisterWidget('_HTMLView', new _WidgetInfo('HTML28', 'jet-options', document.getElementById('HTML28'), {}, 'displayModeFull'));
</script>
</body>*/</style>