편집 기록

편집 기록
  • 프로필 nowp님의 편집
    날짜2021.05.31

    script and nltk


    import nltk
    nltk.download('all')
    
    from urllib import request
    url = "https://www.gutenberg.org/files/64317/64317-0.txt"
    response = request.urlopen(url)
    raw = response.read().decode('utf8')
    print(type(raw))
    
    print(len(raw))
    
    
    
    from nltk import word_tokenize
    tokens = word_tokenize(raw)
    print(type(tokens))
    print(len(tokens))
    
    text = nltk.pos_tag(tokens)
    
    
    
    
    #How many PRP tags are there in The Great Gatsby if you ignore stopwords?
    
    #Use nltk's stopword list and count the words tagged as PRP if that word is not in the stopword list.
    
    from nltk.corpus import stopwords
    nltk.download('stopwords')
    from nltk.tokenize import word_tokenize
    
    text = nltk.pos_tag(tokens)
    text_tokens = word_tokenize(text)
    
    tokens_without_sw = [word for word in text_tokens if not word in stopwords.words()]
    
    print(tokens_without_sw)
    

    주석으로 되어 있는 게 도저히 어떻게 푸는지 모르겠어요.

  • 프로필 ᅟᅟᅟᅟ님의 편집
    날짜2021.05.30

    script and nltk


    import nltk
    nltk.download('all')
    
    from urllib import request
    url = "https://www.gutenberg.org/files/64317/64317-0.txt"
    response = request.urlopen(url)
    raw = response.read().decode('utf8')
    print(type(raw))
    
    print(len(raw))
    
    
    
    from nltk import word_tokenize
    tokens = word_tokenize(raw)
    print(type(tokens))
    print(len(tokens))
    
    text = nltk.pos_tag(tokens)
    
    
    
    
    #How many PRP tags are there in The Great Gatsby if you ignore stopwords?
    
    #Use nltk's stopword list and count the words tagged as PRP if that word is not in the stopword list.
    
    from nltk.corpus import stopwords
    nltk.download('stopwords')
    from nltk.tokenize import word_tokenize
    
    text = nltk.pos_tag(tokens)
    text_tokens = word_tokenize(text)
    
    tokens_without_sw = [word for word in text_tokens if not word in stopwords.words()]
    
    print(tokens_without_sw)
    

    주석으로 되어 있는 게 도저히 어떻게 푸는지 모르겠어요 ㅜㅜ

  • 프로필 알 수 없는 사용자님의 편집
    날짜2021.05.29

    script and nltk


    import nltk nltk.download('all')

    from urllib import request url = "https://www.gutenberg.org/files/64317/64317-0.txt" response = request.urlopen(url) raw = response.read().decode('utf8') print(type(raw))

    print(len(raw))

    from nltk import word_tokenize tokens = word_tokenize(raw) print(type(tokens)) print(len(tokens))

    text = nltk.pos_tag(tokens)

    How many PRP tags are there in The Great Gatsby if you ignore stopwords?

    Use nltk's stopword list and count the words tagged as PRP if that word is not in the stopword list.

    from nltk.corpus import stopwords nltk.download('stopwords') from nltk.tokenize import word_tokenize

    text = nltk.pos_tag(tokens) text_tokens = word_tokenize(text)

    tokens_without_sw = [word for word in text_tokens if not word in stopwords.words()]

    print(tokens_without_sw)

    주석으로 되어 있는 게 도저히 어떻게 푸는지 모르겠어요 ㅜㅜ