From 1b953d51428c66e65364eb7652f7516352118190 Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Fri, 20 Aug 2021 22:44:05 -0700 Subject: [PATCH 1/6] style fixes --- markov.py | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/markov.py b/markov.py index 23e41bc..7317699 100755 --- a/markov.py +++ b/markov.py @@ -1,6 +1,5 @@ import yaml import random -from secrets import SystemRandom from multiprocess import Lock, Manager, Process BACKUP_FILE = "codebro.yaml" @@ -19,13 +18,11 @@ def load_corpus(self, source_file: str): return yaml.load(infile.read(), Loader=yaml.Loader) def generate_markov_text(self, words: list, cache: dict, seed_phrase=None): - w1, w2 = "", "" if seed_phrase: - w1, w2 = seed_phrase[0], seed_phrase[1] + w1, w2 = seed_phrase[:2] else: - urandom = SystemRandom() valid_starts = [(x[0], x[1]) for x in cache.keys() if x[0] == ""] - w1, w2 = valid_starts[urandom.randint(0, len(valid_starts) - 1)] + w1, w2 = random.choice(valid_starts) gen_words = [] while True: @@ -59,11 +56,11 @@ def learn(self, sentence: str): # strip, uppercase, and check for inclusion in IGNORE_WORDS list is_ignored = lambda x: x.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in IGNORE_WORDS tokens = [x for x in tokens if not is_ignored(x)] - if len(tokens) == 0: + if not tokens: return # nothing to learn here! - tokens[len(tokens) - 1] = tokens[len(tokens) - 1].strip(".?!") - tokens = [u""] + tokens + [u""] + tokens[-1] = tokens[-1].strip(".?!") + tokens = [u"", *tokens, u""] indexes_with_stops = [tokens.index(x) for x in tokens if x.strip(".?!") != x] for i in indexes_with_stops[::-1]: tokens[i] = tokens[i].strip(".?!") @@ -76,41 +73,25 @@ def learn(self, sentence: str): # there must be a better way to serialize from the proxy .. local_words = [word for word in self.words] with open('codebro.yaml', 'w') as outfile: - lk.acquire() - outfile.write(yaml.dump(local_words, default_flow_style=True)) - lk.release() + with lk: + outfile.write(yaml.dump(local_words, default_flow_style=True)) def create_response(self, prompt="", learn=False): - prompt_tokens = prompt.split() - # set seedword from somewhere in words if there's no prompt - if len(prompt_tokens) < 1: - seed = random.randint(0, len(self.words)-1) - prompt_tokens.append(self.words[seed]) + prompt_tokens = prompt.split() or [random.choice(self.words)] # create a set of lookups for phrases that start with words # contained in prompt phrase - seed_tuples = [] - for i in range(0, len(prompt_tokens)-2): - seed_phrase = ("", prompt_tokens[i]) - seed_tuples.append(seed_phrase) + seed_tuples = [("", tok) for tok in prompt_tokens[:-2]] # lookup seeds in cache; compile a list of 'hits' - seed_phrase = None - valid_seeds = [] - for seed in seed_tuples: - if seed in self.cache: - valid_seeds.append(seed) + valid_seeds = [seed for seed in seed_tuples if seed in self.cache] # either seed the lookup with a randomly selected valid seed, # or if there were no 'hits' generate with no seedphrase - if len(valid_seeds) > 0: - seed_phrase = valid_seeds[random.randrange(0, len(valid_seeds), 1)] - response = self.generate_markov_text(self.words, self.cache, seed_phrase) - else: - response = self.generate_markov_text(self.words, self.cache) + seed_phrase = random.choice(valid_seeds) if valid_seeds else None + response = self.generate_markov_text(self.words, self.cache, seed_phrase) if learn: - p = Process(target=self.learn, args=(prompt,)) - p.start() + Process(target=self.learn, args=(prompt,)).start() return response From 1891aeb47a3bbcce6dba97ffb86d8fb1b7dd57fd Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Wed, 25 Aug 2021 02:13:09 -0700 Subject: [PATCH 2/6] some optimizations for markov generation cache start words separately to avoid scans. avoid storing useless grams --- markov.py | 107 ++++++++++++++++++++++++++---------------------------- 1 file changed, 51 insertions(+), 56 deletions(-) diff --git a/markov.py b/markov.py index f1aa75e..26ffce4 100755 --- a/markov.py +++ b/markov.py @@ -2,6 +2,8 @@ import random from multiprocess import Lock, Manager, Process +START = "" +STOP = "" # instantiate a Markov object with the source file class Markov: @@ -9,32 +11,30 @@ def __init__(self, brain_file: str, ignore_words, skip_mp=False): self.brain_file = brain_file self.ignore_words = ignore_words self.skip_mp = skip_mp - if not self.skip_mp: + if self.skip_mp: + self.words = list(self.load_corpus(brain_file)) + else: self.manager = Manager() self.words = self.manager.list(self.load_corpus(brain_file)) - self.cache = self.manager.dict(self.database(self.words, {})) - else: - self.words = list(self.load_corpus(brain_file)) - self.cache = dict(self.database(self.words, {})) + self.update_cache() @classmethod def load_corpus(cls, source_file: str): with open(source_file, 'r') as infile: return yaml.load(infile.read(), Loader=yaml.Loader) - @classmethod - def generate_markov_text(cls, words: list, cache: dict, seed_phrase=None): - if seed_phrase: - w1, w2 = seed_phrase[:2] + def generate_markov_text(self, seed=None): + if seed: + w1 = seed else: - valid_starts = [(x[0], x[1]) for x in cache.keys() if x[0] == ""] - w1, w2 = random.choice(valid_starts) + w1 = random.choice(self.cache[START]) + w2 = random.choice(self.cache[w1]) - gen_words = [] + gen_words = [w1] while True: - if w2 == "": + if w2 == STOP: break - w1, w2 = w2, random.choice(cache[(w1, w2)]) + w1, w2 = w2, random.choice(self.cache[(w1, w2)]) gen_words.append(w1) message = ' '.join(gen_words) @@ -47,67 +47,62 @@ def triples(cls, words): for i in range(len(words) - 2): yield (words[i], words[i+1], words[i+2]) - def database(self, words: list, cache: dict): - for w1, w2, w3 in self.triples(words): - key = (w1, w2) - if key in cache: - if not (w3 in cache[key]): - cache[key].append(w3) + def update_cache(self): + db = {START: set()} + next_word_is_start = True + for w1, w2, w3 in self.triples(self.words): + if w1 in (START, STOP) or w2 in (START, STOP): + next_word_is_start = True + else: + if next_word_is_start: + db[START].add(w1) + db.setdefault(w1, set()).add(w2) + next_word_is_start = False + db.setdefault((w1, w2), set()).add(w3) + self.cache = {key: list(val) for key, val in db.items()} + + @classmethod + def tokenize(cls, words: list): + yield START + for w in words: + if any(c in w for c in ('.', '?', '!')): + yield STOP + yield w.strip(".?!") + yield START else: - cache[key] = [w3] - return cache + yield w + yield STOP def learn(self, sentence: str): - tokens = sentence.split() + words = sentence.split() # strip, uppercase, and check for inclusion in IGNORE_WORDS list is_ignored = lambda x: x.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in self.ignore_words - tokens = [x for x in tokens if not is_ignored(x)] - if not tokens: + words = [x for x in words if not is_ignored(x)] + if not words: return # nothing to learn here! - tokens[-1] = tokens[-1].strip(".?!") - tokens = [u"", *tokens, u""] - indexes_with_stops = [tokens.index(x) for x in tokens if x.strip(".?!") != x] - for i in indexes_with_stops[::-1]: - tokens[i] = tokens[i].strip(".?!") - tokens.insert(i + 1, u"") - tokens.insert(i + 2, u"") - - self.words += tokens - self.cache = self.database(self.words, {}) + self.words += list(self.tokenize(words)) + self.update_cache() lk = None if not self.skip_mp: lk = Lock() - # there must be a better way to serialize from the proxy .. - local_words = [word for word in self.words] with open(self.brain_file, 'w') as outfile: if not self.skip_mp: lk.acquire() - outfile.write(yaml.dump(local_words, default_flow_style=True)) + outfile.write(yaml.dump(list(self.words), default_flow_style=True)) if not self.skip_mp: lk.release() def create_response(self, prompt="", learn=False): # set seedword from somewhere in words if there's no prompt - prompt_tokens = prompt.split() or [random.choice(self.words)] - - # create a set of lookups for phrases that start with words - # contained in prompt phrase - seed_tuples = [("", tok) for tok in prompt_tokens[:-2]] - - # lookup seeds in cache; compile a list of 'hits' - valid_seeds = [seed for seed in seed_tuples if seed in self.cache] - - # either seed the lookup with a randomly selected valid seed, - # or if there were no 'hits' generate with no seedphrase - seed_phrase = random.choice(valid_seeds) if valid_seeds else None - response = self.generate_markov_text(self.words, self.cache, seed_phrase) - + prompt_tokens = prompt.split() + valid_seeds = [tok for tok in prompt_tokens[:-2] if tok in self.cache[START]] + seed_word = random.choice(valid_seeds) if valid_seeds else None + response = self.generate_markov_text(seed_word) if learn: - if not self.skip_mp: - p = Process(target=self.learn, args=(prompt,)) - p.start() - else: + if self.skip_mp: self.learn(prompt) + else: + Process(target=self.learn, args=(prompt,)).start() return response From 606baa634d8b6c01877e1e84a4ed2d430e584cd2 Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Wed, 25 Aug 2021 02:33:54 -0700 Subject: [PATCH 3/6] avoid linear operation when choosing seed --- markov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markov.py b/markov.py index 26ffce4..8ecbcb5 100755 --- a/markov.py +++ b/markov.py @@ -51,7 +51,7 @@ def update_cache(self): db = {START: set()} next_word_is_start = True for w1, w2, w3 in self.triples(self.words): - if w1 in (START, STOP) or w2 in (START, STOP): + if w1 in (START, STOP) or w2 in (START, STOP) or w3 == START: next_word_is_start = True else: if next_word_is_start: @@ -97,7 +97,7 @@ def learn(self, sentence: str): def create_response(self, prompt="", learn=False): # set seedword from somewhere in words if there's no prompt prompt_tokens = prompt.split() - valid_seeds = [tok for tok in prompt_tokens[:-2] if tok in self.cache[START]] + valid_seeds = [tok for tok in prompt_tokens[:-2] if tok in self.cache and tok != START] seed_word = random.choice(valid_seeds) if valid_seeds else None response = self.generate_markov_text(seed_word) if learn: From 227d123e910d9625ed0f6068e9602b3e1b0e65a8 Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Wed, 25 Aug 2021 02:57:15 -0700 Subject: [PATCH 4/6] reorder methods --- markov.py | 58 +++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/markov.py b/markov.py index 8ecbcb5..f610f2f 100755 --- a/markov.py +++ b/markov.py @@ -23,23 +23,6 @@ def load_corpus(cls, source_file: str): with open(source_file, 'r') as infile: return yaml.load(infile.read(), Loader=yaml.Loader) - def generate_markov_text(self, seed=None): - if seed: - w1 = seed - else: - w1 = random.choice(self.cache[START]) - w2 = random.choice(self.cache[w1]) - - gen_words = [w1] - while True: - if w2 == STOP: - break - w1, w2 = w2, random.choice(self.cache[(w1, w2)]) - gen_words.append(w1) - - message = ' '.join(gen_words) - return message - @classmethod def triples(cls, words): if len(words) < 3: @@ -47,6 +30,18 @@ def triples(cls, words): for i in range(len(words) - 2): yield (words[i], words[i+1], words[i+2]) + @classmethod + def tokenize(cls, words: list): + yield START + for w in words: + if any(c in w for c in ('.', '?', '!')): + yield STOP + yield w.strip(".?!") + yield START + else: + yield w + yield STOP + def update_cache(self): db = {START: set()} next_word_is_start = True @@ -61,18 +56,6 @@ def update_cache(self): db.setdefault((w1, w2), set()).add(w3) self.cache = {key: list(val) for key, val in db.items()} - @classmethod - def tokenize(cls, words: list): - yield START - for w in words: - if any(c in w for c in ('.', '?', '!')): - yield STOP - yield w.strip(".?!") - yield START - else: - yield w - yield STOP - def learn(self, sentence: str): words = sentence.split() @@ -94,6 +77,23 @@ def learn(self, sentence: str): if not self.skip_mp: lk.release() + def generate_markov_text(self, seed=None): + if seed: + w1 = seed + else: + w1 = random.choice(self.cache[START]) + w2 = random.choice(self.cache[w1]) + + gen_words = [w1] + while True: + if w2 == STOP: + break + w1, w2 = w2, random.choice(self.cache[(w1, w2)]) + gen_words.append(w1) + + message = ' '.join(gen_words) + return message + def create_response(self, prompt="", learn=False): # set seedword from somewhere in words if there's no prompt prompt_tokens = prompt.split() From b64d4d99086436616ecb8c89ae82ac55c92d7146 Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Wed, 25 Aug 2021 10:34:06 -0700 Subject: [PATCH 5/6] incremental updates for in-memory markov db --- markov.py | 64 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/markov.py b/markov.py index f610f2f..475dff9 100755 --- a/markov.py +++ b/markov.py @@ -30,8 +30,14 @@ def triples(cls, words): for i in range(len(words) - 2): yield (words[i], words[i+1], words[i+2]) - @classmethod - def tokenize(cls, words: list): + def _ignore(self, word: str): + word.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in self.ignore_words + + def tokenize(self, sentence: str): + words = [w for w in sentence.split() if not self._ignore(w)] + if not words: + return + yield START for w in words: if any(c in w for c in ('.', '?', '!')): @@ -42,31 +48,36 @@ def tokenize(cls, words: list): yield w yield STOP - def update_cache(self): - db = {START: set()} - next_word_is_start = True - for w1, w2, w3 in self.triples(self.words): + def update_cache(self, new_sentence = None): + if new_sentence: + db = self.cache + words = list(self.tokenize(new_sentence)) + else: + db = {START: []} + words = self.words + + start_of_chain = True + for w1, w2, w3 in self.triples(words): if w1 in (START, STOP) or w2 in (START, STOP) or w3 == START: - next_word_is_start = True + start_of_chain = True else: - if next_word_is_start: - db[START].add(w1) - db.setdefault(w1, set()).add(w2) - next_word_is_start = False - db.setdefault((w1, w2), set()).add(w3) - self.cache = {key: list(val) for key, val in db.items()} - - def learn(self, sentence: str): - words = sentence.split() - - # strip, uppercase, and check for inclusion in IGNORE_WORDS list - is_ignored = lambda x: x.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in self.ignore_words - words = [x for x in words if not is_ignored(x)] - if not words: - return # nothing to learn here! + if start_of_chain: + if w1 not in db[START]: + db[START].append(w1) + next_words = db.setdefault(w1, []) + if w2 not in next_words: + next_words.append(w2) + start_of_chain = False + next_words = db.setdefault((w1, w2), []) + if w3 not in next_words: + next_words.append(w3) + self.cache = db - self.words += list(self.tokenize(words)) - self.update_cache() + def update_corpus(self, sentence: str): + new_words = list(self.tokenize(sentence)) + if not new_words: + return + self.words += new_words lk = None if not self.skip_mp: lk = Lock() @@ -102,7 +113,8 @@ def create_response(self, prompt="", learn=False): response = self.generate_markov_text(seed_word) if learn: if self.skip_mp: - self.learn(prompt) + self.update_corpus(prompt) else: - Process(target=self.learn, args=(prompt,)).start() + Process(target=self.update_corpus, args=(prompt,)).start() + self.update_cache(prompt) return response From a973d80db9acc6c093d6626b7323f2b44d4fe620 Mon Sep 17 00:00:00 2001 From: Matt Hanlon Date: Wed, 25 Aug 2021 14:30:34 -0700 Subject: [PATCH 6/6] eliminate multiprocessing and improve efficiency - read brain from either yaml or plain text - write learned corpus by appending to a plain text file - use generators where possible to avoid big reads - don't write new phrases if we don't learn from them - get rid of and tokens, they can be implicit in the corpus This should make us performant enough we can forgo multiprocessing and do everything in-process --- main.py | 12 ++-- markov.py | 183 +++++++++++++++++++++++++++++++----------------------- 2 files changed, 113 insertions(+), 82 deletions(-) diff --git a/main.py b/main.py index 4238f7c..d083b96 100755 --- a/main.py +++ b/main.py @@ -32,15 +32,15 @@ parser.add_argument('-b', '--brain', env_var="CB_BRAIN", required=True, - help="This bot's brain as a YAML file.") + help="This bot's input brain as a YAML or newline-delimited text file.") +parser.add_argument('-o', '--output', + env_var="CB_OUTPUT", + required=True, + help="File for writing the updated corpus") parser.add_argument('-n', '--name', env_var="CB_NAME", required=True, help="The name this bot will respond to in chats.") -parser.add_argument('--skip_mp', - env_var="CB_SKIP_MP", - action="http://github.com/store_true", - help="Skip the multiprocess stuff that can hinder debugging.") args = parser.parse_args() discord_token = args.discord_token @@ -48,7 +48,7 @@ slack_app_token = args.slack_app_token bot_name = args.name -brain = Markov(args.brain, bot_name.upper(), args.skip_mp) +brain = Markov(args.brain, args.output, [bot_name]) discord_client = discord.Client() diff --git a/markov.py b/markov.py index 475dff9..edb782c 100755 --- a/markov.py +++ b/markov.py @@ -1,105 +1,140 @@ -import yaml import random -from multiprocess import Lock, Manager, Process +import yaml +from itertools import chain, groupby + +START_TOK = "" +STOP_TOK = "" -START = "" -STOP = "" +STOP = object() +START = object() # instantiate a Markov object with the source file class Markov: - def __init__(self, brain_file: str, ignore_words, skip_mp=False): - self.brain_file = brain_file - self.ignore_words = ignore_words - self.skip_mp = skip_mp - if self.skip_mp: - self.words = list(self.load_corpus(brain_file)) - else: - self.manager = Manager() - self.words = self.manager.list(self.load_corpus(brain_file)) - self.update_cache() + def __init__(self, input_file: str, output_file: str, ignore_words): + if input_file == output_file: + raise ValueError("input and output files must be different") - @classmethod - def load_corpus(cls, source_file: str): + self.ignore_words = set(w.upper() for w in ignore_words) + self.output_file = output_file + self.update_graph_and_corpus(self.corpus_iter(input_file), init=True) + + def corpus_iter(self, source_file: str): + """ + Emit the contents of the source_file as an iterable of token sequences + """ with open(source_file, 'r') as infile: - return yaml.load(infile.read(), Loader=yaml.Loader) + # this is dumb + if source_file.endswith(".yml") or source_file.endswith(".yaml"): + words = yaml.load(infile.read(), Loader=yaml.Loader) + for is_delim, phrase in groupby(words, lambda w: w in (START_TOK, STOP_TOK)): + if not is_delim: + yield list(phrase) + else: + for line in infile: + yield from self.tokenize(line) + @classmethod - def triples(cls, words): - if len(words) < 3: + def triples_and_stop(cls, words): + """ + Emit 3-grams from the sequence of words, the last one ending with the + special STOP token + """ + words = chain(words, [STOP]) + try: + w1 = next(words) + w2 = next(words) + w3 = next(words) + while True: + yield (w1, w2, w3) + w1, w2, w3 = w2, w3, next(words) + except StopIteration: return - for i in range(len(words) - 2): - yield (words[i], words[i+1], words[i+2]) def _ignore(self, word: str): - word.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in self.ignore_words + return word.strip("\'\"!@#$%^&*().,/\\+=<>?:;").upper() in self.ignore_words def tokenize(self, sentence: str): - words = [w for w in sentence.split() if not self._ignore(w)] - if not words: - return + """ + Emit a sequence of token lists from the string, ignoring ignore_words. + A word ending in certain puntuation ends a given token sequence. + """ + cur = [] + for w in sentence.split(): + if self._ignore(w): + pass - yield START - for w in words: - if any(c in w for c in ('.', '?', '!')): - yield STOP - yield w.strip(".?!") - yield START + elif any(w.endswith(c) for c in ('.', '?', '!')): + w = w.strip(".?!") + if w: + cur.append(w) + yield(cur) + cur = [] else: - yield w - yield STOP + cur.append(w) + if cur: + yield cur - def update_cache(self, new_sentence = None): - if new_sentence: - db = self.cache - words = list(self.tokenize(new_sentence)) - else: - db = {START: []} - words = self.words + def _update_graph_and_emit_changes(self, token_seqs, init=False): + """ + self.graph stores the graph of n-gram trasitions. + The keys are single tokens or pairs and the values possible next words in the n-gram. + Initial tokens are also specially added to the list at the key START. - start_of_chain = True - for w1, w2, w3 in self.triples(words): - if w1 in (START, STOP) or w2 in (START, STOP) or w3 == START: - start_of_chain = True - else: - if start_of_chain: - if w1 not in db[START]: - db[START].append(w1) - next_words = db.setdefault(w1, []) + _update_graph_and_emit_changes returns a generator that when run will + update the graph with the ngrams taken from each element of token_seqs. + + Yields the token sequence that result in updates so they can be further + acted on. + + if init is True reinitialize from an empty graph + """ + if init: + self.graph = {START: []} + + for seq in token_seqs: + first = True + learned = False + for w1, w2, w3 in self.triples_and_stop(seq): + if first: + if w1 not in self.graph[START]: + self.graph[START].append(w1) + learned = True + next_words = self.graph.setdefault(w1, []) if w2 not in next_words: next_words.append(w2) - start_of_chain = False - next_words = db.setdefault((w1, w2), []) + learned = True + first = False + next_words = self.graph.setdefault((w1, w2), []) if w3 not in next_words: next_words.append(w3) - self.cache = db + learned = True + if learned: + yield seq - def update_corpus(self, sentence: str): - new_words = list(self.tokenize(sentence)) - if not new_words: - return - self.words += new_words - lk = None - if not self.skip_mp: - lk = Lock() - with open(self.brain_file, 'w') as outfile: - if not self.skip_mp: - lk.acquire() - outfile.write(yaml.dump(list(self.words), default_flow_style=True)) - if not self.skip_mp: - lk.release() + def update_graph_and_corpus(self, token_seqs, init=False): + changes = self._update_graph_and_emit_changes(token_seqs, init=init) + self.update_corpus(changes, init=init) + + def update_corpus(self, token_seqs, init=False): + mode = 'w' if init else 'a' + with open(self.output_file, mode) as f: + for seq in token_seqs: + f.write(" ".join(seq)) + f.write("\n") def generate_markov_text(self, seed=None): - if seed: + if seed and seed in self.graph: w1 = seed else: - w1 = random.choice(self.cache[START]) - w2 = random.choice(self.cache[w1]) + w1 = random.choice(self.graph[START]) + w2 = random.choice(self.graph[w1]) gen_words = [w1] while True: if w2 == STOP: break - w1, w2 = w2, random.choice(self.cache[(w1, w2)]) + w1, w2 = w2, random.choice(self.graph[(w1, w2)]) gen_words.append(w1) message = ' '.join(gen_words) @@ -108,13 +143,9 @@ def generate_markov_text(self, seed=None): def create_response(self, prompt="", learn=False): # set seedword from somewhere in words if there's no prompt prompt_tokens = prompt.split() - valid_seeds = [tok for tok in prompt_tokens[:-2] if tok in self.cache and tok != START] + valid_seeds = [tok for tok in prompt_tokens[:-2] if tok in self.graph] seed_word = random.choice(valid_seeds) if valid_seeds else None response = self.generate_markov_text(seed_word) if learn: - if self.skip_mp: - self.update_corpus(prompt) - else: - Process(target=self.update_corpus, args=(prompt,)).start() - self.update_cache(prompt) + self.update_graph_and_corpus(self.tokenize(prompt)) return response