Skip to content

Latest commit

 

History

History

0.0.2-0.0.3

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 

Comparing tmp/veriscore-0.0.2.tar.gz & tmp/veriscore-0.0.3.tar.gz

filetype from file(1)

@@ -1 +1 @@
-gzip compressed data, was "veriscore-0.0.2.tar", last modified: Fri May 31 18:43:36 2024, max compression
+gzip compressed data, was "veriscore-0.0.3.tar", last modified: Mon Jun  3 06:40:13 2024, max compression

Comparing veriscore-0.0.2.tar & veriscore-0.0.3.tar

file list

@@ -1,20 +1,22 @@
-drwxrwxr-x   0 yekyungkim (32790) yekyungkim (32790)        0 2024-05-31 18:43:36.448060 veriscore-0.0.2/
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     1068 2024-05-21 17:48:27.000000 veriscore-0.0.2/LICENSE
--rw-r--r--   0 yekyungkim (32790) yekyungkim (32790)      522 2024-05-31 18:43:36.448060 veriscore-0.0.2/PKG-INFO
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)       11 2024-05-21 17:48:27.000000 veriscore-0.0.2/README.md
-drwxrwxr-x   0 yekyungkim (32790) yekyungkim (32790)        0 2024-05-31 18:43:36.448060 veriscore-0.0.2/VeriScore.egg-info/
--rw-r--r--   0 yekyungkim (32790) yekyungkim (32790)      522 2024-05-31 18:43:36.000000 veriscore-0.0.2/VeriScore.egg-info/PKG-INFO
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)      391 2024-05-31 18:43:36.000000 veriscore-0.0.2/VeriScore.egg-info/SOURCES.txt
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)        1 2024-05-31 18:43:36.000000 veriscore-0.0.2/VeriScore.egg-info/dependency_links.txt
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)       31 2024-05-31 18:43:36.000000 veriscore-0.0.2/VeriScore.egg-info/requires.txt
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)       10 2024-05-31 18:43:36.000000 veriscore-0.0.2/VeriScore.egg-info/top_level.txt
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)       38 2024-05-31 18:43:36.448060 veriscore-0.0.2/setup.cfg
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)      685 2024-05-31 18:42:29.000000 veriscore-0.0.2/setup.py
-drwxrwxr-x   0 yekyungkim (32790) yekyungkim (32790)        0 2024-05-31 18:43:36.448060 veriscore-0.0.2/veriscore/
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     6857 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/ClaimExtractor.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     4003 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/GetResponse.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     3315 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/SearchAPI.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)       22 2024-05-31 18:42:25.000000 veriscore-0.0.2/veriscore/__init__.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)    26609 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/claim_extraction_prompt_utils.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     2006 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/evidence_retrieval.py
--rw-rw-r--   0 yekyungkim (32790) yekyungkim (32790)     7023 2024-05-22 19:10:56.000000 veriscore-0.0.2/veriscore/extract_claims.py
+drwxrwsr-x   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)        0 2024-06-03 06:40:13.056623 veriscore-0.0.3/
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     1068 2024-05-21 17:48:27.000000 veriscore-0.0.3/LICENSE
+-rw-r--r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)      543 2024-06-03 06:40:13.051867 veriscore-0.0.3/PKG-INFO
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)       11 2024-05-21 17:48:27.000000 veriscore-0.0.3/README.md
+drwxrwsr-x   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)        0 2024-06-03 06:40:13.046189 veriscore-0.0.3/VeriScore.egg-info/
+-rw-r--r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)      543 2024-06-03 06:40:12.000000 veriscore-0.0.3/VeriScore.egg-info/PKG-INFO
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)      429 2024-06-03 06:40:12.000000 veriscore-0.0.3/VeriScore.egg-info/SOURCES.txt
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)        1 2024-06-03 06:40:12.000000 veriscore-0.0.3/VeriScore.egg-info/dependency_links.txt
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)       37 2024-06-03 06:40:12.000000 veriscore-0.0.3/VeriScore.egg-info/requires.txt
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)       10 2024-06-03 06:40:12.000000 veriscore-0.0.3/VeriScore.egg-info/top_level.txt
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)       38 2024-06-03 06:40:13.057968 veriscore-0.0.3/setup.cfg
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)      700 2024-06-03 06:35:16.000000 veriscore-0.0.3/setup.py
+drwxrwsr-x   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)        0 2024-06-03 06:40:13.041023 veriscore-0.0.3/veriscore/
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)       22 2024-06-03 06:35:16.000000 veriscore-0.0.3/veriscore/__init__.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     7042 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/claim_extractor.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     5002 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/claim_verifier.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     3757 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/extract_claims.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     3839 2024-06-03 06:36:16.000000 veriscore-0.0.3/veriscore/get_response.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     2036 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/retrieval_evidence.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     3123 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/search_API.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     2136 2024-06-03 06:11:04.000000 veriscore-0.0.3/veriscore/verify_claims.py
+-rw-rw-r--   0 yekyungkim_umass_edu  (2662) pi_miyyer_umass_edu (10067)     8287 2024-06-03 06:33:47.000000 veriscore-0.0.3/veriscore/veriscore.py

Comparing veriscore-0.0.2/LICENSE & veriscore-0.0.3/LICENSE

  • Files identical despite different names

Comparing veriscore-0.0.2/PKG-INFO & veriscore-0.0.3/PKG-INFO

  • Files 24% similar despite different names
@@ -1,19 +1,20 @@
 Metadata-Version: 2.1
 Name: VeriScore
-Version: 0.0.2
+Version: 0.0.3
 Summary: Pip package for Verifact
 Home-page: https://github.com/mungg/VeriScore
 Author: Yixio Song
 Author-email: [email protected]
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: spacy
 Requires-Dist: openai
 Requires-Dist: anthropic
 Requires-Dist: tiktoken
 Requires-Dist: tqdm
 
 # VeriScore

Comparing veriscore-0.0.2/VeriScore.egg-info/PKG-INFO & veriscore-0.0.3/VeriScore.egg-info/PKG-INFO

  • Files 24% similar despite different names
@@ -1,19 +1,20 @@
 Metadata-Version: 2.1
 Name: VeriScore
-Version: 0.0.2
+Version: 0.0.3
 Summary: Pip package for Verifact
 Home-page: https://github.com/mungg/VeriScore
 Author: Yixio Song
 Author-email: [email protected]
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: spacy
 Requires-Dist: openai
 Requires-Dist: anthropic
 Requires-Dist: tiktoken
 Requires-Dist: tqdm
 
 # VeriScore

Comparing veriscore-0.0.2/setup.py & veriscore-0.0.3/setup.py

  • Files 9% similar despite different names
@@ -1,15 +1,16 @@
 from setuptools import setup, find_packages
 
 setup(
    name="VeriScore",
-   version="0.0.2",
+   version="0.0.3",
    packages=find_packages(),
    install_requires=[
        #'requests', 'numpy'
+      'spacy',
       'openai',
       'anthropic',
       'tiktoken',
       'tqdm'
    ],
    author="Yixio Song",
    author_email="[email protected]",

Comparing veriscore-0.0.2/veriscore/ClaimExtractor.py & veriscore-0.0.3/veriscore/claim_extractor.py

  • Files 7% similar despite different names
@@ -1,21 +1,21 @@
 import os
 import regex
 import pdb
 import json
 import spacy
 from tqdm import tqdm
-from .GetResponse import GetResponse
-from .claim_extraction_prompt_utils import non_qa_prompt_temp, qa_prompt_temp
+from .get_response import GetResponse
+
 
 class ClaimExtractor():
-    def __init__(self, model_name, input_file_name, cache_dir):
+    def __init__(self, model_name, cache_dir="./data/cache/"):
         cache_dir = os.path.join(cache_dir, model_name)
         os.makedirs(cache_dir, exist_ok=True)
-        self.cache_file = os.path.join(cache_dir, f"response_decompose_cache_{input_file_name}.json")
+        self.cache_file = os.path.join(cache_dir, f"claim_extraction_cache.json")
         self.spacy_nlp = spacy.load('en_core_web_sm')
         self.get_model_response = GetResponse(cache_file=self.cache_file,
                                               model_name=model_name,
                                               max_tokens=1000,
                                               temperature=0)
         self.system_message = "You are a helpful assistant who can extract verifiable atomic claims from a piece of text. Each atomic fact should be verifiable against reliable external world knowledge (e.g., via Wikipedia)"
 
@@ -34,28 +34,29 @@
         all_facts_lst = []
         # keep track of token counts
         prompt_tok_cnt, response_tok_cnt = 0, 0
         for para in paragraph_lst:
             # split the text into sentences using spaCy
             sentences = self.get_sentence(para)
             for i in range(len(sentences)):
-                lead_sent = sentences[0] # 1st sentence of the para
+                lead_sent = sentences[0]  # 1st sentence of the para
                 context1 = " ".join(sentences[max(0, i - 3):i])
                 sentence = f"<SOS>{sentences[i].strip()}<EOS>"
-                context2 = " ".join(sentences[i+1:i+2])
+                context2 = " ".join(sentences[i + 1:i + 2])
 
                 # if the para is not long
                 if len(sentences) <= 5:
                     snippet = f"{context1.strip()} {sentence.strip()} {context2.strip()}".strip()
                 # if the para is long, add lead sentence to context1
                 else:
                     snippet = f"{lead_sent.strip()} {context1.strip()} {sentence.strip()} {context2.strip()}".strip()
 
                 # call fact_extractor on each snippet
-                facts, prompt_tok_num, response_tok_num = self.fact_extractor(snippet, sentences[i].strip(), qa_input = False)
+                facts, prompt_tok_num, response_tok_num = self.fact_extractor(snippet, sentences[i].strip(),
+                                                                              qa_input=False)
 
                 # update token counts
                 prompt_tok_cnt += prompt_tok_num
                 response_tok_cnt += response_tok_num
 
                 if facts == None:
                     continue
@@ -65,21 +66,21 @@
                     if fact.strip() == "":
                         continue
                     # cases where GPT returns its justification
                     elif fact.startswith("Note:"):
                         continue
                     elif fact not in all_facts_lst:
                         all_facts_lst.append(fact)
-        
+
         print(f"Returning facts and token counts for the whole response ...")
         if all_facts_lst == None:
             return None, prompt_tok_cnt, response_tok_cnt
         else:
             return all_facts_lst, prompt_tok_cnt, response_tok_cnt
-        
+
     def qa_scanner_extractor(self, question, response):
         """
         Given a model output to a question
         - split the response into sentences using spaCy
         - snippet = question (context1 = 0-3 sentence) <SOS>Sent<EOS> (context2 = 0-1 sentence)
         - call fact_extractor on each snippet
         """
@@ -97,16 +98,16 @@
             context2 = " ".join(sentences[i + 1:i + 2])
 
             snippet = f"Question: {question.strip()}\nResponse: {context1.strip()} {sentence.strip()} {context2.strip()}".strip()
             # new return value
             snippet_lst.append(snippet)
 
             # call fact_extractor on each tesnippetxt
-            facts, prompt_tok_num, response_tok_num = self.fact_extractor(snippet, sentences[i].strip(), qa_input = True)
-            
+            facts, prompt_tok_num, response_tok_num = self.fact_extractor(snippet, sentences[i].strip(), qa_input=True)
+
             # update token counts
             prompt_tok_cnt += prompt_tok_num
             response_tok_cnt += response_tok_num
 
             if facts == None:
                 fact_lst_lst.append([None])
                 continue
@@ -120,34 +121,39 @@
                 elif fact.startswith("Note:"):
                     continue
                 elif fact not in all_facts_lst:
                     all_facts_lst.append(fact.strip())
                 fact_lst.append(fact.strip())
             fact_lst_lst.append(fact_lst)
         print(f"Returning facts and token counts for the whole response ...")
-        # if all_facts_lst == None:
-        #     return None, prompt_tok_cnt, response_tok_cnt
-        # else:
-        #     return all_facts_lst, prompt_tok_cnt, response_tok_cnt
+
         return snippet_lst, fact_lst_lst, all_facts_lst, prompt_tok_cnt, response_tok_cnt
 
     def get_sentence(self, text):
         # use spaCy to split the text into sentences
         return [x.text.strip() for x in self.spacy_nlp(text).sents]
-    
-    def fact_extractor(self, snippet, sentence, qa_input = False):
+
+    def get_prompt_template(self, qa_input):
+        if qa_input:
+            prompt_template = open("./prompt/extraction_qa_template.txt", "r").read()
+        else:
+            prompt_template = open("./prompt/extraction_non_qa_template.txt", "r").read()
+        return prompt_template
+
+    def fact_extractor(self, snippet, sentence, qa_input=False):
         """
         snippet = (context1) <SOS>sentence<EOS> (context2)
         sentence = the sentence to be focused on
         """
-        prompt_template = qa_prompt_temp if qa_input else non_qa_prompt_temp
+        prompt_template = self.get_prompt_template(qa_input)  # qa_prompt_temp if qa_input else non_qa_prompt_temp
         prompt_text = prompt_template.format(snippet=snippet, sentence=sentence)
-        response, prompt_tok_cnt, response_tok_cnt = self.get_model_response.get_response(self.system_message, prompt_text)
+        response, prompt_tok_cnt, response_tok_cnt = self.get_model_response.get_response(self.system_message,
+                                                                                          prompt_text)
 
         if "No verifiable claim." in response:
             return None, prompt_tok_cnt, response_tok_cnt
         else:
             # remove itemized list
             facts = [x.strip().replace("- ", "") for x in response.split("\n")]
             # remove numbers in the beginning
             facts = [regex.sub(r"^\d+\.?\s", "", x) for x in facts]
-            return facts, prompt_tok_cnt, response_tok_cnt
+            return facts, prompt_tok_cnt, response_tok_cnt

Comparing veriscore-0.0.2/veriscore/GetResponse.py & veriscore-0.0.3/veriscore/get_response.py

  • Files 2% similar despite different names
@@ -1,14 +1,15 @@
 import os
 import pdb
 import json
 import tiktoken
 from openai import OpenAI
 from anthropic import Anthropic
 
+
 class GetResponse():
     # OpenAI model list: https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
     # Claude3 model list: https://www.anthropic.com/claude
     # "claude-3-opus-20240229", gpt-4-0125-preview
     def __init__(self, cache_file, model_name="gpt-4-0125-preview", max_tokens=1000, temperature=0):
         self.model_name = model_name
         self.max_tokens = max_tokens
@@ -31,56 +32,55 @@
         self.save_interval = 20
 
     # Returns the response from the model given a system message and a prompt text.
     def get_response(self, system_message, prompt_text):
         # check if prompt is in cache; if so, return from cache
         cache_key = prompt_text.strip()
         if cache_key in self.cache_dict:
-            print("Getting response from cache ...")
             return self.cache_dict[cache_key], 0, 0
-        
+
         # Example message: "You are a helpful assistant who can extract verifiable atomic claims from a piece of text."
         if "gpt" in self.model_name:
             message = [{"role": "system", "content": system_message},
                        {"role": "user", "content": prompt_text}]
             response = self.client.chat.completions.create(
-                        model=self.model_name,
-                        messages=message,
-                        max_tokens=self.max_tokens,
-                        temperature=self.temperature,
-                        seed=self.seed,
-                        )
+                model=self.model_name,
+                messages=message,
+                max_tokens=self.max_tokens,
+                temperature=self.temperature,
+                seed=self.seed,
+            )
             response_content = response.choices[0].message.content.strip()
         elif "claude" in self.model_name:
             response = self.client.messages.create(
-                        model=self.model_name,
-                        messages=[{"role": "user", "content": prompt_text}],
-                        temperature=self.temperature,
-                        max_tokens=self.max_tokens
-                        )
+                model=self.model_name,
+                messages=[{"role": "user", "content": prompt_text}],
+                temperature=self.temperature,
+                max_tokens=self.max_tokens
+            )
             response_content = response.content[0].text.strip()
 
         # update cache
         self.cache_dict[cache_key] = response_content.strip()
         self.add_n += 1
 
         # save cache every save_interval times
         if self.add_n % self.save_interval == 0:
             self.save_cache()
 
         # count tokens in prompt and response
         prompt_tokens = len(self.tokenizer.encode(prompt_text))
         response_tokens = len(self.tokenizer.encode(response_content))
         return response_content, prompt_tokens, response_tokens
-    
+
     # Returns the number of tokens in a text string.
     def tok_count(self, text: str) -> int:
         num_tokens = len(self.tokenizer.encode(text))
         return num_tokens
-    
+
     def save_cache(self):
         # load the latest cache first, since if there were other processes running in parallel, cache might have been updated
         for k, v in self.load_cache().items():
             self.cache_dict[k] = v
         print(f"Saving cache to {self.cache_file}...")
         with open(self.cache_file, "w") as f:
             json.dump(self.cache_dict, f, indent=4)

Comparing veriscore-0.0.2/veriscore/SearchAPI.py & veriscore-0.0.3/veriscore/search_API.py

  • Files 13% similar despite different names
@@ -1,89 +1,87 @@
 import os
 from ast import literal_eval
 import pdb
 import json
 import requests
 from tqdm import tqdm
 
+
 class SearchAPI():
     def __init__(self):
         # invariant variables
         self.serper_key = os.getenv("SERPER_KEY_PRIVATE")
-        self.serper_key = os.getenv("SERPER_KEY")
         self.url = "https://google.serper.dev/search"
         self.headers = {'X-API-KEY': self.serper_key,
                         'Content-Type': 'application/json'}
-        # # cache related
-        # self.cache_file = "fine_tune_code_data/data/run_ft-ed_models_on_110_datapoints/cache"
-        # self.cache_dict = self.load_cache()
-        # self.add_n = 0
-        # self.save_interval = 10
+        # cache related
+        self.cache_file = "data/cache/search_cache.json"
+        self.cache_dict = self.load_cache()
+        self.add_n = 0
+        self.save_interval = 10
 
     def get_snippets(self, claim_lst):
         text_claim_snippets_dict = {}
         for query in claim_lst:
             search_result = self.get_search_res(query)
+            if "statusCode" in search_result:  # and search_result['statusCode'] == 403:
+                print(search_result['message'])
+                exit()
             if "organic" in search_result:
                 organic_res = search_result["organic"]
             else:
                 organic_res = []
 
             search_res_lst = []
             for item in organic_res:
                 title = item["title"] if "title" in item else ""
                 snippet = item["snippet"] if "snippet" in item else ""
                 link = item["link"] if "link" in item else ""
 
-                search_res_lst.append({"title": title, 
-                                       "snippet": snippet, 
+                search_res_lst.append({"title": title,
+                                       "snippet": snippet,
                                        "link": link})
             text_claim_snippets_dict[query] = search_res_lst
         return text_claim_snippets_dict
 
     def get_search_res(self, query):
-        # # check if prompt is in cache; if so, return from cache
-        # cache_key = query.strip()
-        # if cache_key in self.cache_dict:
-        #     print("Getting search results from cache ...")
-        #     return self.cache_dict[cache_key]
+        # check if prompt is in cache; if so, return from cache
+        cache_key = query.strip()
+        if cache_key in self.cache_dict:
+            # print("Getting search results from cache ...")
+            return self.cache_dict[cache_key]
 
-        payload = json.dumps({"q": query, "autocorrect": False})
+        payload = json.dumps({"q": query})
         response = requests.request("POST",
                                     self.url,
                                     headers=self.headers,
                                     data=payload)
-        # pdb.set_trace()
-        # response_json = literal_eval(response.text)
-        try:
-            response_json = json.loads(response.text)
-        except json.JSONDecodeError as e:
-            print(f"Error: {e}")
-
-        # # update cache
-        # self.cache_dict[query.strip()] = response_json
-        # self.add_n += 1
-
-        # # save cache every save_interval times
-        # if self.add_n % self.save_interval == 0:
-        #     self.save_cache()
-        
+        response_json = literal_eval(response.text)
+
+        # update cache
+        self.cache_dict[query.strip()] = response_json
+        self.add_n += 1
+
+        # save cache every save_interval times
+        if self.add_n % self.save_interval == 0:
+            self.save_cache()
+
         return response_json
 
-    # def save_cache(self):
-    #     # load the latest cache first, since if there were other processes running in parallel, cache might have been updated
-    #     cache = self.load_cache().items()
-    #     for k, v in cache:
-    #         self.cache_dict[k] = v
-    #     print(f"Saving search cache ...")
-    #     with open(self.cache_file, "w") as f:
-    #         json.dump(self.cache_dict, f, indent=4)
-
-    # def load_cache(self):
-    #     if os.path.exists(self.cache_file):
-    #         with open(self.cache_file, "r") as f:
-    #             # load a json file
-    #             cache = json.load(f)
-    #             print(f"Loading cache ...")
-    #     else:
-    #         cache = {}
-    #     return cache
+    def save_cache(self):
+        # load the latest cache first, since if there were other processes running in parallel, cache might have been updated
+        cache = self.load_cache().items()
+        for k, v in cache:
+            self.cache_dict[k] = v
+        print(f"Saving search cache ...")
+        with open(self.cache_file, "w") as f:
+            json.dump(self.cache_dict, f, indent=4)
+
+    def load_cache(self):
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "r") as f:
+                # load a json file
+                cache = json.load(f)
+                print(f"Loading cache ...")
+        else:
+            cache = {}
+        return cache

Comparing veriscore-0.0.2/veriscore/evidence_retrieval.py & veriscore-0.0.3/veriscore/retrieval_evidence.py

  • Files 23% similar despite different names
@@ -1,64 +1,62 @@
 import os
 import pdb
 import json
 import random
 import argparse
 from tqdm import tqdm
-from SearchAPI import SearchAPI
+from .search_API import SearchAPI
 
-# add argparse argument
-parser = argparse.ArgumentParser()
-parser.add_argument("--multiple", type=str, default="5")
-args = parser.parse_args()
-
-"""
-Read in extracted claim file
-"""
-begin = 400 * (int(args.multiple) - 1)
-end = 400 * int(args.multiple)
-
-file_dir = f"fine_tune_code_data/data/run_ft-ed_models_on_110_datapoints/4800_response_claim_extracted/merged_sampled_data_4800_extracted_{begin}_{end}.jsonl"
-
-"""
-Get search results for each claim
-Store as a dictionary {claim: {"search_results": [search_results]}}
-"""
-# initialize search api
-fetch_search = SearchAPI()
-
-# read in extracted claim data
-with open(file_dir, "r") as f:
-    data = [json.loads(x) for x in f.readlines()]
-
-# set up output directory
-out_dir = f"fine_tune_code_data/data/run_ft-ed_models_on_110_datapoints/4800_response_claim_extracted_w_evidence/merged_sampled_data_4800_extracted_{begin}_{end}_searched.jsonl"
-os.makedirs(os.path.dirname(out_dir), exist_ok=True)
-
-# get the pick up point
-try:
-    with open(out_dir, "r") as f:
-        pick_up_point = len(f.readlines())
-        print(f"Pick up point: {pick_up_point}")
-except:
-    pick_up_point = 0
-    print("No pick up point")
-    pass
-
-# iterate through each data point and get search results
-print(f"Begin{begin} End{end} Multiple {args.multiple}")
-with open(out_dir, "a+") as f:
-    for dict_item in tqdm(data[pick_up_point:]):
-        if dict_item['abstained'] == True:
-            f.write(json.dumps(dict_item) + "\n")
-            continue
-        
-        claim_lst = dict_item["all_claim_lst"]
-        if claim_lst == ["No verifiable claim."]:
-            dict_item["claim_srch_res_lst"] = []
-            f.write(json.dumps(dict_item) + "\n")
-            continue
-        claim_snippets = fetch_search.get_snippets(claim_lst)
-        dict_item["claim_srch_res_lst"] = claim_snippets
+if __name__ == '__main__':
 
-        f.write(json.dumps(dict_item) + "\n")
-        f.flush()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='./data')
+    parser.add_argument("--input_file", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, default='./data')
+    args = parser.parse_args()
+
+    """
+    Get search results for each claim
+    Store as a dictionary {claim: {"search_results": [search_results]}}
+    """
+    # initialize search api
+    fetch_search = SearchAPI()
+
+    input_file_name = "".join(args.input_file.split('.')[:-1])
+    input_path = os.path.join(args.data_dir, args.input_file)
+
+    # read in extracted claim data
+    with open(input_path, "r") as f:
+        data = [json.loads(x) for x in f.readlines()]
+
+    output_dir = args.output_dir
+    output_file = f"evidence_{input_file_name}.jsonl"
+    output_path = os.path.join(output_dir, output_file)
+    os.makedirs(os.path.dirname(output_dir), exist_ok=True)
+
+    # get the pick up point
+    # try:
+    #     with open(output_path, "r") as f:
+    #         pick_up_point = len(f.readlines())
+    #         print(f"Pick up point: {pick_up_point}")
+    # except:
+    #     pick_up_point = 0
+    #     print("No pick up point")
+    #     pass
+
+    # iterate through each data point and get search results
+    with open(output_path, "w") as f:
+        for dict_item in tqdm(data):
+            if dict_item['abstained']:
+                f.write(json.dumps(dict_item) + "\n")
+                continue
+
+            claim_lst = dict_item["all_claim_lst"]
+            if claim_lst == ["No verifiable claim."]:
+                dict_item["claim_snippets_dict"] = []
+                f.write(json.dumps(dict_item) + "\n")
+                continue
+            claim_snippets = fetch_search.get_snippets(claim_lst)
+            dict_item["claim_snippets_dict"] = claim_snippets
+
+            f.write(json.dumps(dict_item) + "\n")
+            f.flush()

Comparing veriscore-0.0.2/veriscore/extract_claims.py & veriscore-0.0.3/veriscore/veriscore.py

  • Files 25% similar despite different names
@@ -1,88 +1,21 @@
 """
 This script is written to extract claims from the model responses.
 model generations: /data/yixiao/atomic_claims/data/model_generation_decomposition/model_generations
 """
 
 import os
-import pdb
 import json
 import argparse
+import spacy
 from tqdm import tqdm
-from .ClaimExtractor import ClaimExtractor
-#
-# args = argparse.ArgumentParser()
-# args.add_argument("--input_file", type=str, required=True)
-# args = args.parse_args()
-#
-# input_file_names = ['Mistral-7B-Instruct-v0.1',
-#                     'Mistral-7B-Instruct-v0.2',
-#                     'Mixtral-8x7B-Instruct-v0.1',
-#                     'Mixtral-8x22B-Instruct-v0.1',
-#                     'gpt-4-0125-preview',
-#                     'gpt-3.5-turbo-1106',
-#                     'gpt-3.5-turbo-0613',
-#                     'claude-3-opus-20240229',
-#                     'claude-3-sonnet-20240229',
-#                     'claude-3-haiku-20240307',
-#                     'dbrx-instruct',
-#                     'OLMo-7B-Instruct', ]
-#
-# abstain_responses = ["I'm sorry, I cannot fulfill that request.",
-#                      "I'm sorry, I can't fulfill that request.",
-#                      "I'm sorry, but I cannot fulfill that request.",
-#                      "I'm sorry, but I can't fulfill that request.",
-#                      "Sorry, but I can't fulfill that request.",
-#                      "Sorry, I can't do that."]
-#
-# # read data
-# input_file_name = args.input_file
-# assert input_file_name in input_file_names
-# input_file_dir = f"data/model_generation_decomposition/model_generations/data_{input_file_name}.jsonl"
-# with open(input_file_dir, "r") as f:
-#     data = [json.loads(x) for x in f.readlines() if x.strip()]
-#
-# # initialize objects
-# model_name = "gpt-4-0125-preview"
-# claim_extractor = ClaimExtractor(model_name, input_file_name)
-#
-# output_dir = f"data/model_generation_decomposition/decomposed_responses/claims_{input_file_name}.jsonl"
-# with open(output_dir, "a+") as f:
-#     for dict_item in tqdm(data[1558:]):
-#         # get necessary info
-#         question = dict_item["question"]
-#         response = dict_item["response"]
-#         prompt_source = dict_item["prompt_source"]
-#         model = dict_item["model"]
-#
-#         # skip abstained responses
-#         if response.strip() in abstain_responses:
-#             output_dict = {"question": question.strip(),
-#                            "response": response.strip(),
-#                            "abstained": True,
-#                            "prompt_source": prompt_source,
-#                            "model": model,}
-#             f.write(json.dumps(output_dict) + "\n")
-#             continue
-#
-#         # extract claims
-#         snippet_lst, fact_lst_lst, all_facts_lst, prompt_tok_cnt, response_tok_cnt = claim_extractor.qa_scanner_extractor(question, response)
-#
-#         # write output
-#         output_dict = {"question": question.strip(),
-#                        "response": response.strip(),
-#                        "abstained": False, # "abstained": False, "abstained": True
-#                        "snippet_lst": snippet_lst,
-#                        "fact_lst_lst": fact_lst_lst,
-#                        "all_facts_lst": all_facts_lst,
-#                        "prompt_tok_cnt": prompt_tok_cnt,
-#                        "response_tok_cnt": response_tok_cnt,
-#                        "prompt_source": prompt_source,
-#                        "model": model,}
-#         f.write(json.dumps(output_dict) + "\n")
+from .claim_extractor import ClaimExtractor
+from .search_API import SearchAPI
+from .claim_verifier import ClaimVerifier
+
 
 input_file_names = ['Mistral-7B-Instruct-v0.1',
                     'Mistral-7B-Instruct-v0.2',
                     'Mixtral-8x7B-Instruct-v0.1',
                     'Mixtral-8x22B-Instruct-v0.1',
                     'gpt-4-0125-preview',
                     'gpt-3.5-turbo-1106',
@@ -96,67 +29,153 @@
 abstain_responses = ["I'm sorry, I cannot fulfill that request.",
                      "I'm sorry, I can't fulfill that request.",
                      "I'm sorry, but I cannot fulfill that request.",
                      "I'm sorry, but I can't fulfill that request.",
                      "Sorry, but I can't fulfill that request.",
                      "Sorry, I can't do that."]
 
+class VeriScorer(object):
+    def __init__(self,
+                 model_name_extraction='https://github.com/gpt-4-0125-preview',
+                 model_name_verification='gpt-4o',
+                 data_dir='./data',
+                 cache_dir='./data/cache',
+                 output_dir='./data_cache',
+                 label_n=3,
+                 search_res_num=5):
+        self.data_dir = data_dir
+        self.output_dir = output_dir
+        self.cache_dir = cache_dir
+
+        os.makedirs(output_dir, exist_ok=True)
+        os.makedirs(self.cache_dir, exist_ok=True)
+
+        self.spacy_nlp = spacy.load('en_core_web_sm')
+
+        self.system_message_extraction = "You are a helpful assistant who can extract verifiable atomic claims from a piece of text. Each atomic fact should be verifiable against reliable external world knowledge (e.g., via Wikipedia)"
+
+        self.claim_extractor = ClaimExtractor(model_name_extraction, cache_dir=self.cache_dir)
+
+        self.fetch_search = SearchAPI()
+
+        demon_dir = os.path.join(self.data_dir, 'demos')
+        self.model_name_verification = model_name_verification
+        self.claim_verifier = ClaimVerifier(model_name=model_name_verification, label_n=label_n,
+                                         cache_dir=self.cache_dir, demon_dir=demon_dir)
+        self.label_n = label_n
+        self.search_res_num = search_res_num
+
+    def get_veriscore(self, data, input_file_name):
+
+        ### extract claims ###
+        output_file = f"claims_{input_file_name}.jsonl"
+        output_path = os.path.join(self.output_dir, output_file)
+
+        extracted_claims = []
+        with open(output_path, "w") as f:
+            for dict_item in tqdm(data):
+                question = dict_item["question"]
+                response = dict_item["response"]
+                prompt_source = dict_item["prompt_source"]
+                model = dict_item["model"]
+
+                # skip abstained responses
+                if response.strip() in abstain_responses:
+                    output_dict = {"question": question.strip(),
+                                   "response": response.strip(),
+                                   "abstained": True,
+                                   "prompt_source": prompt_source,
+                                   "model": model, }
+                    f.write(json.dumps(output_dict) + "\n")
+                    extracted_claims.append(output_dict)
+                    continue
+
+                # extract claims
+                snippet_lst, claim_lst_lst, all_claim_lst, prompt_tok_cnt, response_tok_cnt = self.claim_extractor.qa_scanner_extractor(
+                    question, response)
+
+                # write output
+                output_dict = {"question": question.strip(),
+                               "prompt_source": prompt_source,
+                               "response": response.strip(),
+                               "prompt_tok_cnt": prompt_tok_cnt,
+                               "response_tok_cnt": response_tok_cnt,
+                               "model": model,
+                               "abstained": False,  # "abstained": False, "abstained": True
+                               "claim_lst_lst": claim_lst_lst,
+                               "all_claim_lst": all_claim_lst
+                               }
+                f.write(json.dumps(output_dict) + "\n")
+                extracted_claims.append(output_dict)
+
+        print(f"claim extraction is done! saved to {output_path}")
+
+        output_file = f"evidence_{input_file_name}.jsonl"
+        output_path = os.path.join(self.output_dir, output_file)
+        searched_evidence_dict = []
+        with open(output_path, "w") as f:
+            for dict_item in tqdm(extracted_claims):
+                if dict_item['abstained']:
+                    f.write(json.dumps(dict_item) + "\n")
+                    searched_evidence_dict.append(dict_item)
+                    continue
+
+                claim_lst = dict_item["all_claim_lst"]
+                if claim_lst == ["No verifiable claim."]:
+                    dict_item["claim_snippets_dict"] = []
+                    f.write(json.dumps(dict_item) + "\n")
+                    searched_evidence_dict.append(dict_item)
+                    continue
+                claim_snippets = self.fetch_search.get_snippets(claim_lst)
+                dict_item["claim_snippets_dict"] = claim_snippets
+                searched_evidence_dict.append(dict_item)
+                f.write(json.dumps(dict_item) + "\n")
+                f.flush()
+        print(f"evidence searching is done! saved to {output_path}")
+
+        output_dir = os.path.join(args.output_dir, 'model_output')
+        os.makedirs(output_dir, exist_ok=True)
+        output_file = f'verification_{input_file_name}_{self.model_name_verification}_{self.label_n}.jsonl'
+        output_path = os.path.join(output_dir, output_file)
+
+        total_prompt_tok_cnt = 0
+        total_resp_tok_cnt = 0
+        with open(output_path, "w") as f:
+            for dict_item in tqdm(searched_evidence_dict):
+                claim_snippets_dict = dict_item["claim_snippets_dict"]
+                claim_verify_res_dict, prompt_tok_cnt, response_tok_cnt = self.claim_verifier.verifying_claim(
+                    claim_snippets_dict, search_res_num=args.search_res_num)
+                json.dump(claim_verify_res_dict, f)
+                f.write("\n")
+                total_prompt_tok_cnt += prompt_tok_cnt
+                total_resp_tok_cnt += response_tok_cnt
+
+        print(f"claim verification is done! saved to {output_path}")
+        print(f"Total cost: {total_prompt_tok_cnt * 10 / 1e6 + total_resp_tok_cnt * 30 / 1e6}")
+
 if __name__ == '__main__':
-    args = argparse.ArgumentParser()
-    args.add_argument("--data_dir", type=str, default='data')
-    args.add_argument("--input_file", type=str, required=True)
-    args.add_argument("--output_dir", type=str, default='data')
-    args.add_argument("--cache_dir", type=str, default='data/cache')
-    args.add_argument("--model_name", type=str, default="gpt-4-0125-preview")
-    args = args.parse_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='./data')
+    parser.add_argument("--input_file", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, default='./data')
+    parser.add_argument("--cache_dir", type=str, default='./data/cache')
+    parser.add_argument("--model_name_extraction", type=str, default="gpt-4-0125-preview")
+    parser.add_argument("--model_name_verification", type=str, default="gpt-4o")
+    parser.add_argument("--label_n", type=int, default=3, choices=[2, 3])
+    parser.add_argument("--search_res_num", type=int, default=5)
+    args = parser.parse_args()
+
+
+    vs = VeriScorer(model_name_extraction=args.model_name_extraction,
+                    model_name_verification=args.model_name_verification,
+                    data_dir=args.data_dir,
+                    output_dir=args.output_dir,
+                    cache_dir=args.cache_dir,
+                    label_n=args.label_n,
+                    search_res_num=args.search_res_num)
 
     input_file_name = "".join(args.input_file.split('.')[:-1])
-    # assert input_file_name in input_file_names
-
     input_path = os.path.join(args.data_dir, args.input_file)
     with open(input_path, "r") as f:
         data = [json.loads(x) for x in f.readlines() if x.strip()]
 
-    # initialize objects
-    model_name = args.model_name
-    claim_extractor = ClaimExtractor(model_name, input_file_name, args.cache_dir)
-
-    output_dir = args.output_dir
-    output_file = f"claims_{input_file_name}.jsonl"
-    output_path = os.path.join(output_dir, output_file)
-
-    print(os.path.abspath(output_path))
-
-    with open(output_path, "a+") as f:
-
-        for dict_item in tqdm(data):
-            # get necessary info
-            question = dict_item["question"]
-            response = dict_item["response"]
-            prompt_source = dict_item["prompt_source"]
-            model = dict_item["model"]
-
-            # skip abstained responses
-            if response.strip() in abstain_responses:
-                output_dict = {"question": question.strip(),
-                               "response": response.strip(),
-                               "abstained": True,
-                               "prompt_source": prompt_source,
-                               "model": model,}
-                f.write(json.dumps(output_dict) + "\n")
-                continue
-
-            # extract claims
-            snippet_lst, claim_lst_lst, all_claim_lst, prompt_tok_cnt, response_tok_cnt = claim_extractor.qa_scanner_extractor(question, response)
-
-            # write output
-            output_dict = {"question": question.strip(),
-                           "prompt_source": prompt_source,
-                           "response": response.strip(),
-                           "prompt_tok_cnt": prompt_tok_cnt,
-                           "response_tok_cnt": response_tok_cnt,
-                           "model": model,
-                           "abstained": False, # "abstained": False, "abstained": True
-                           "claim_lst_lst": claim_lst_lst,
-                           "all_claim_lst": all_claim_lst
-                           }
-            f.write(json.dumps(output_dict) + "\n")
+    vs.get_veriscore(data, input_file_name)