script and nltk
조회수 606회
import nltk
nltk.download('all')
from urllib import request
url = "https://www.gutenberg.org/files/64317/64317-0.txt"
response = request.urlopen(url)
raw = response.read().decode('utf8')
print(type(raw))
print(len(raw))
from nltk import word_tokenize
tokens = word_tokenize(raw)
print(type(tokens))
print(len(tokens))
text = nltk.pos_tag(tokens)
#How many PRP tags are there in The Great Gatsby if you ignore stopwords?
#Use nltk's stopword list and count the words tagged as PRP if that word is not in the stopword list.
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
text = nltk.pos_tag(tokens)
text_tokens = word_tokenize(text)
tokens_without_sw = [word for word in text_tokens if not word in stopwords.words()]
print(tokens_without_sw)
주석으로 되어 있는 게 도저히 어떻게 푸는지 모르겠어요.
-
(•́ ✖ •̀)
알 수 없는 사용자 - 〉
댓글 입력