025. konlpy
# @
# Konlpy internally uses java libraries
# Install konlpy
pip install konlpy
# Reference konlpy document in konlpy-ko.readthedocs.io/ko/
# Click api button to see api information
# Konlpy has 5 tag packages
# hannanum class, kkms class, komora class, mecab class, twitter class
# Konlpy uses "twitter morphological analyzer"
# because it's easiest to deal with morphemes
# because name of morpheme is represented in korean
# You can choose other morphological analyzers
# Process of morphological analysis can be done by just one method,
# so it's not big deal process
# @
# How to use twitter morphological analyzer
from konlpy.tag import Twitter
twitter = Twitter()
# Pyhon 3.6 version automatically processes unicode characters,
# so, you don't need to mark "u"
# like print(twitter.morphs(u'단독입찰보다 복수입찰의 경우'))
# Followings are examples of process of morphological analysis
# with Twitter morphological analyzer
print(twitter.morphs('단독입찰보다 복수입찰의 경우'))
# < Output shows all morphemes
# < ['단독','입찰','보다','복수','입찰','의','경우']
print(twitter.nouns('유일하게 항공기 체계 종합개발 경험을 갖고 있는 KAI는'))
# < Output shows nouns
# < ['유일하','항공기','체계','종합','개발','경험']
print(twitter.pos('이것도 되나욬ㅋㅋ'))
# < Output shows word and name of morpheme
# < [('이', "Determiner"), ('것', "Noun"), ('도', "Josa"), ('되나욬', "Noun"), ('ㅋㅋ', "KoreanParticle")]
print(twitter.pos('이것도 되나욬ㅋㅋ', norm=True))
# < output:
# < [('이', "Determiner"), ('것', "Noun"), ('도', "Josa"), ('되', "Verb"), ('나요', "Eomi"), ('ㅋㅋ', "KoreanParticle")]
print(twitter.pos('이것도 되나욬ㅋㅋ', norm=True, stem=True))
# < output:
# < [('이', "Determiner"), ('것', "Noun"), ('도', "Josa"), ('되다', "Verb"), ('ㅋㅋ', "KoreanParticle")]
# @
# If you run into issuse related to jpype,
# you can run "pip install jpype1"
# @
# Way of twitter.pos('이것도 되나욬ㅋㅋ', norm=True, stem=True)) will be mostly used