我通过转换情绪分析脚本来使用它们来教我自己(可能是我的第一个错误)类和方法.
我以为我已经掌握了所有方法,但我一直在努力
未定义全局名称“get_bigram_word_feats”
我确定我也会收到get_word_feats的错误,如果它到那么远的话.
我正在撞击这个伟大的时间.我尝试删除staticmethod并添加self.我究竟做错了什么?
这是我的代码:
def word_feats(words):
return dict([(word,True) for word in words])
class SentClassifier:
def __init__(self,name,location):
self.name = name
self.location = location
self.fullpath = location + "/" + name
def doesexist(self):
return os.path.isfile(self.fullpath)
def save_classifier(self):
rf = open(self.fullpath,'wb')
pickle.dump(self.fullpath,rf)
rf.close()
def load_classifier(self):
sf = open(self.fullpath,'rb')
sclassifier = pickle.load(sf)
sf.close()
return sclassifier
class Training:
def __init__(self,neg,pos):
self.neg = neg
self.pos = pos
self.negids = open(self.neg,'rb').read().splitlines(True)
self.posids = open(self.pos,'rb').read().splitlines(True)
self.exclude = set(string.punctuation)
self.exclude = self.exclude,'...'
self.swords = stopwords.words('english')
def tokens(self,words):
words = [w for w in nltk.word_tokenize(words) if w not in self.exclude and len(w) > 1
and w not in self.swords and wordnet.synsets(w)]
return words
def idlist(self,words):
thisidlist = [self.tokens(tf) for tf in words]
return thisidlist
@staticmethod
def get_word_feats(words):
return dict([(word,True) for word in words])
@staticmethod
def get_bigram_word_feats(twords,score_fn=BigramAssocMeasures.chi_sq,tn=200):
words = [w for w in twords]
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn,tn)
return dict([(ngram,True) for ngram in itertools.chain(words,bigrams)])
@staticmethod
def label_feats(thelist,label):
return [(get_word_feats(lf),label) for lf in thelist]
@staticmethod
def label_grams(thelist,label):
return [(get_bigram_word_feats(gf),label) for gf in thelist()]
@staticmethod
def combinegrams(grams,feats):
for g in grams():
feats.append(g)
return feats
def negidlist(self):
return self.idlist(self.negids)
def posidlist(self):
return self.idlist(self.posids)
def posgrams(self):
return self.label_grams(self.posidlist,'pos')
def neggrams(self):
return self.label_grams(self.negidlist,'neg')
def negwords(self):
return self.label_feats(self.negidlist,'neg')
def poswords(self):
return self.label_feats(self.posidlist,'pos')
def negfeats(self):
return self.combinegrams(self.neggrams,self.negwords)
def posfeats(self):
return self.combinegrams(self.posgrams,self.poswords)
starttime = time.time()
myclassifier = SentClassifier("sentanalyzer.pickle","classifiers")
if myclassifier.doesexist() is False:
print "training new classifier"
trainset = Training('data/neg.txt','data/pos.txt')
negfeats = trainset.negfeats()
posfeats = trainset.posfeats()
negcutoff = len(negfeats) * 8 / 10
poscutoff = len(posfeats) * 8 / 10
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances,test on %d instances' % (len(trainfeats),len(testfeats))
classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:',nltk.classify.util.accuracy(classifier,testfeats)
myclassifier.save_classifier()
else:
print "using existing classifier"
classifier = myclassifier.load_classifier()
classifier.show_most_informative_features(20)
mystr = "16 steps to an irresistible sales pitch,via @vladblagi: slidesha.re/1bVV7OS"
myfeat = word_feats(nltk.word_tokenize(mystr))
print classifier.classify(myfeat)
probd = classifier.prob_classify(myfeat)
print probd.prob('neg')
print probd.prob('pos')
donetime = time.time() - starttime
print donetime
最佳答案
您需要的所有信息都在异常消息中:
global name ‘get_bigram_word_feats’ is not defined
(我的重点)
Python不理解您要从类中访问该方法,因为您没有将类名指定为方法调用的一部分.因此,它正在全局命名空间中查找该函数,但未能找到它.
如果从调用实例方法中回忆起来,则需要在方法前加上self.使Python解释器看起来正确,虽然你没有指定self,但这也适用于静态方法,而是指定类名.
return [(Training.get_bigram_word_feats(gf),label) for gf in thelist()]
^---+---^
|
+-- you need this part