Jan-25-2020, 11:34 AM
Hello,
i have difficulties with my homework (Task 4).
I don't know how to do this.
I have already an attempt but I think it is wrong and I don't know how to go on.
The task gives me pseudocode as a hint but I can't make code from it.
I know it's not that hard and ist only a few lines, but I have no Idea what to do.
my code for task 1, 2, 3 and my attempt for task 4:
![[Image: c8kWdT3]](https://ibb.co/c8kWdT3)
Task 3 and 4: (Here I get stucked)
![[Image: nnd3pD4]](https://ibb.co/nnd3pD4)
Can someone help me or give me a hint?
Thank you in advance
i have difficulties with my homework (Task 4).
I don't know how to do this.
I have already an attempt but I think it is wrong and I don't know how to go on.
The task gives me pseudocode as a hint but I can't make code from it.
I know it's not that hard and ist only a few lines, but I have no Idea what to do.
my code for task 1, 2, 3 and my attempt for task 4:
import re
class Ngram:
filename = ""
n = 0
raw_counts = {}
prob = {}
cond_prob = {}
# Task 1
def __init__(self, filename="", n=0):
self.filename = filename
self.n = n
# Task 2
def extract_raw_counts(self):
fp = open(self.filename, 'r')
allLines = fp.readlines()
for line in allLines:
tokenLst = tokenize_smart(line.rstrip("\r\n"))
for i in range(0,self.n-1):
tokenLst.insert(0,"BOS")
tokenLst.append("EOS")
for i in range(len(tokenLst)-self.n):
newTuple = tuple(tokenLst[i:i+self.n])
if newTuple in self.raw_counts:
self.raw_counts[newTuple] += 1
else:
self.raw_counts[newTuple] = 1
# Task 3
def extract_probabilities(self):
sumRawCounts = sum(self.raw_counts.values()) + len(self.raw_counts)
for key in self.raw_counts:
self.prob[key] = self.raw_counts[key] / sumRawCounts
# Task 4
def extract_conditional_probabilities(self):
#my attemt for task 4
for key in self.prob:
mgram = key[0:self.n-1]
unigram = key[self.n]
if not mgram in self.prob:
self.prob[mgram] = {}
else:
self.cond_prob[mgram] = unigram
pass
# Task 5
def generate_random_token(self, mgram):
"""
Generate a random next token based on an n-1 gram,
taking into account the probability distribution over the possible next tokens for that n-1-gram.
:param mgram: the n-1 gram to generate the next token for.
:type mgram: a tuple (of length n-1) of strings.
:return a random next token for the n-1-gram.
:rtype str
"""
pass
# Task 6
def generate_random_sentence(self):
"""
Generate a random sentence.
:return a random sentence
:rtype list[str]
"""
pass
def tokenize_smart(sentence):
"""
Tokenize the sentence into tokens (words, punctuation).
:param sentence: the sentence to be tokenized
:type sentence: str
:return: list of tokens in the sentence
:rtype: list[str]
"""
tokens = []
for word in re.sub(r" +", " ", sentence).split():
word = re.sub(r"[\"„”“»«`\(\)]", "", word)
if word != "":
if word[-1] in ".,!?;:":
if len(word) == 1:
tokens += [word]
else:
tokens += [word[:-1], word[-1]]
else:
tokens.append(word)
return tokens
def list2str(sentence):
"""
Convert a sentence given as a list of strings to the sentence as a string separated by whitespace.
:param sentence: the string list to be joined
:type sentence: list[str]
:return: sentence as a string, separated by whitespace
:rtype: str
"""
sentence = " ".join(sentence)
sentence = re.sub(r" ([\.,!\?;:])", r"\1", sentence)
return sentence
if __name__ == '__main__':
# Task 1
print("Task 1:")
ngram_model = Ngram("de-sentences-tatoeba.txt", 2)
print(ngram_model.n, ngram_model.filename)
print(ngram_model.raw_counts, ngram_model.prob, ngram_model.cond_prob)
# Task 2
print("\nTask 2:")
ngram_model.extract_raw_counts()
print(ngram_model.raw_counts[("kaltes", "Land")])
print(ngram_model.raw_counts[("schönes", "Land")])
# Task 3
print("\nTask 3:")
ngram_model.extract_probabilities()
print(ngram_model.prob[("kaltes", "Land")])
print(ngram_model.prob[("schönes", "Land")])
'''
# Task 4
ngram_model.extract_conditional_probabilities()
print(ngram_model.cond_prob[(" beobachteten ",)])
print(ngram_model.cond_prob[("schönes",)][("Land")])
# Task 5
print(ngram_model.generate_random_token(("den",)))
print(ngram_model.generate_random_token(("den",)))
print(ngram_model.generate_random_token(("den",)))
# Task 6
print(list2str(ngram_model.generate_random_sentence()))
print(list2str(ngram_model.generate_random_sentence()))
'''Task 1 and 2: Task 3 and 4: (Here I get stucked)
Can someone help me or give me a hint?
Thank you in advance
