Text embedding
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import defaultdict
import numpy as np
Docs embedding
TfidfEmbeddingVectorizer
class TfidfEmbeddingVectorizer(object):
def __init__(self, word2vec):
self.word2vec = word2vec
def transform(self, X):
tfidf = TfidfVectorizer(analyzer = lambda x : x)
tfidf.fit(X)
max_idf = max(tfidf.idf_)
word2weight = defaultdict(lambda : max_idf, [(w, tfidf.idf_[i]) for w, i in tfidf.vocabulary_.items()])
array_list =[]
for words in X:
array_list.append(np.array(np.mean([self.word2vec[w]*word2weight[w] for w in words if w in self.word2vec] or [np.zeros(100)], axis = 0)))
return(array_list)
vec_tf_skip_gram = TfidfEmbeddingVectorizer(w2v_skip_gram)
vec_tf_CBOW = TfidfEmbeddingVectorizer(w2v_CBOW)
skip-gram
train_tf_s = vec_tf_skip_gram.transform(text_train)
test_tf_s = vec_tf_skip_gram.transform(text_test)
CBOW
train_tf_c = vec_tf_CBOW.transform(text_train)
test_tf_c = vec_tf_CBOW.transform(text_test)
NameError Traceback (most recent call last)
<ipython-input-8-58d8af0cf2a6> in <module>
67 return(array_list)
68
---> 69 vec_tf_skip_gram = {TfidfEmbeddingVectorizer}'(w2v_skip_gram)
70 vec_tf_CBOW = 'TfidfEmbeddingVectorizer'(w2v_CBOW)
71 # skip-gram
NameError: name 'w2v_skip_gram' is not defined
and also error on w2v_CBOW too how can i fix this?
보시다 시피
vec_tf_skip_gram = {TfidfEmbeddingVectorizer}'(w2v_skip_gram)에서 name error
가 나옵니다
물론 그 다음인
70 vec_tf_CBOW = 'TfidfEmbeddingVectorizer'(w2v_CBOW)
에서도 error가 나오는데 어떻게 해결해야할까요?