Hello dear forum members,
I currently have a task of running about 9,000 files through IBM Watson Personality Insight api. To do this, together with my colleague we created the following code (see below). I realize it's not perfect in any way, but it does the job. However, with small files of <100 words we get an error from the api (also see below). Could you please help me address this issue by improving the code, as my programming skills are insufficient :(( Basically, I'd like the code to skip any file that is <100 words and move on with the rest of the batch. Thank you in advance for your help :)
We made some improvements to pass on any file that is <100 words; however, it still gives the same error.
I currently have a task of running about 9,000 files through IBM Watson Personality Insight api. To do this, together with my colleague we created the following code (see below). I realize it's not perfect in any way, but it does the job. However, with small files of <100 words we get an error from the api (also see below). Could you please help me address this issue by improving the code, as my programming skills are insufficient :(( Basically, I'd like the code to skip any file that is <100 words and move on with the rest of the batch. Thank you in advance for your help :)
import re
import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import sys
import glob
digits=(glob.glob('/Users/.../Desktop/Watson Analysis/2013/*.txt'))
personality_insights = PersonalityInsightsV2(
username='......',
password='......')
with open('/Users/.../Desktop/Watson Analysis/2013/watson_twitter_2013.csv', 'wt') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
with open(digits[0]) as \
personality_text:
parse=json.dumps(personality_insights.profile(
text=personality_text.read()), indent=2)
newp=re.split("}|{", parse)
v=[ x for x in newp if "id" in x and "percentage" in x ]
d=(re.findall(r'id": (.*?),', str(v)))
w=(re.findall(r'percentage": (.*?),', str(v)))
len(d)
len(w)
print(d)
print(w)
spamwriter.writerow(['doc']+['word_count']+d)
for number in digits:
with open(str(number)) as \
personality_text:
parse=json.dumps(personality_insights.profile(
text=personality_text.read()), indent=2)
newp=re.split("}|{", parse)
v=[ x for x in newp if "id" in x and "percentage" in x ]
d=(re.findall(r'id": (.*?),', str(v)))
w=(re.findall(r'percentage": (.*?),', str(v)))
len(d)
len(w)
print(d)
print(w)
wordcount=(re.findall(r'word_count": (.*?),', parse))
spamwriter.writerow([number]+wordcount+w) Error:---------------------------------------------------------------------------
WatsonApiException Traceback (most recent call last)
<ipython-input-3-26e015583735> in <module>()
33 with open(str(number)) as personality_text:
34 parse=json.dumps(personality_insights.profile(
---> 35 text=personality_text.read()), indent=2)
36 newp=re.split("}|{", parse)
37 v=[ x for x in newp if "id" in x and "percentage" in x ]
/anaconda3/lib/python3.6/site-packages/watson_developer_cloud/personality_insights_v2.py in profile(self, text, content_type, accept, language, csv_headers)
59 response = self.request(
60 method='POST', url='/v2/profile', data=text, params=params,
---> 61 headers=headers)
62 if accept == 'application/json':
63 return response.json()
/anaconda3/lib/python3.6/site-packages/watson_developer_cloud/watson_service.py in request(self, method, url, accept_json, headers, params, json, data, files, **kwargs)
446 error_info = self._get_error_info(response)
447 raise WatsonApiException(response.status_code, error_message,
--> 448 info=error_info, httpResponse=response)
WatsonApiException: Error: The number of words 94 is less than the minimum number of words required for analysis: 100, Code: 400 , X-dp-watson-tran-id: gateway01-2094016729 , X-global-transaction-id: 7ecac92c5ae406a47cd028d9We made some improvements to pass on any file that is <100 words; however, it still gives the same error.
import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import re
import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import sys
import glob
digits=(glob.glob('/Users/.../Desktop/Test/*.txt'))
personality_insights = PersonalityInsightsV2(
username='...',
password='...')
with open('/Users/.../Desktop/Test/Test.csv', 'wt') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
with open(digits[0]) as \
personality_text:
parse=json.dumps(personality_insights.profile(
text=personality_text.read()), indent=2)
newp=re.split("}|{", parse)
v=[ x for x in newp if "id" in x and "percentage" in x ]
d=(re.findall(r'id": (.*?),', str(v)))
w=(re.findall(r'percentage": (.*?),', str(v)))
len(d)
len(w)
print(d)
print(w)
spamwriter.writerow(['doc']+['word_count']+d)
for number in digits:
with open(str(number)) as \
personality_text:
f = open(str(number),"r")
string = f.read()
s=string.split(" ")
if len(s)<100:
pass
else:
parse=json.dumps(personality_insights.profile(
text=personality_text.read()), indent=2)
newp=re.split("}|{", parse)
v=[ x for x in newp if "id" in x and "percentage" in x ]
d=(re.findall(r'id": (.*?),', str(v)))
w=(re.findall(r'percentage": (.*?),', str(v)))
len(d)
len(w)
print(d)
print(w)
wordcount=(re.findall(r'word_count": (.*?),', parse))
spamwriter.writerow([number]+wordcount+w)
