Hi guys,
Pretty straightforward and most likely easy question for you guys here:
I'm trying to create and use my own corpora saved as a .txt file, however, it is not being found
There are two files and their directory is as follows:
/jordanxxx/nltk_data/corpora/short_reviews/neg/neg.txt
/jordanxxx/nltk_data/corpora/short_reviews/pos/pos.txt
any input would be great as I'd really like to use my own bodies of text in the future with something as simple as converting it to a .txt file and copy+pasting into an appropriate spot.
EDIT: I am using Homebrew if that is of any significance
Pretty straightforward and most likely easy question for you guys here:
I'm trying to create and use my own corpora saved as a .txt file, however, it is not being found
There are two files and their directory is as follows:
/jordanxxx/nltk_data/corpora/short_reviews/neg/neg.txt
/jordanxxx/nltk_data/corpora/short_reviews/pos/pos.txt
import nltk
import random
from nltk.corpus import movie_reviews
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk import word_tokenize
class VoteClassifier(ClassifierI):
def __init__(self, *classifiers):
self._classifiers = classifiers
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
short_pos = open("short_reviews/pos.txt", "r").read
short_neg = open("short_reviews/neg.txt", "r").read
documents = []
for r in short_pos.split('\n'):
documents.append((r, "pos"))
for r in short_neg.split('\n'):
documents.append((r, "neg"))
all_words = []
short_pos_words = word.tokenize(short_pos)
short_neg_words = word.tokenize(short_neg)
for w in short_pos_words:
all_words.append(w. lower())
for w in short_neg_words:
all_words.append(w. lower())
all_words = nltk.FreqDist(all_words)Error:Traceback (most recent call last):
File "/Users/jordanXXX/Documents/NLP/bettertrainingdata", line 37, in <module>
short_pos = open("short_reviews/pos.txt", "r").read
IOError: [Errno 2] No such file or directory: 'short_reviews/pos.txt'I have already tried:f=open('neg.txt', 'rU')Error:>>> f=open('neg.txt','rU')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
IOError: [Errno 2] No such file or directory: 'neg.txt'and i'm not really trying to add a whole lot of code to append paths etc unless i have to.any input would be great as I'd really like to use my own bodies of text in the future with something as simple as converting it to a .txt file and copy+pasting into an appropriate spot.
EDIT: I am using Homebrew if that is of any significance
