Spaces:

zinoubm
/

TwitterTosChatBot

No application file

App Files Files Community

zinoubm commited on Feb 26, 2023

Commit

64ef551

1 Parent(s): cc822cd

refactroing the project to be modular

Browse files

Files changed (12) hide show

prompts/question_answering.txt +1 -1
requirements.txt +2 -1
test/test.py → src/__init__.py +0 -0
src/gpt_3_manager.py +5 -5
src/index.py +7 -9
src/prompt.py +31 -9
src/tests/__init__.py +1 -0
src/tests/chat_test.py +28 -0
src/tests/gpt_3_manager_test.py +21 -0
src/tests/index_test.py +30 -0
src/tests/prompt_test.py +62 -0
src/tests/utils_test.py +14 -0

prompts/question_answering.txt CHANGED Viewed

@@ -4,4 +4,4 @@ passage: <<PASSAGE>>
 question: <<QUESTION>>
-answer:


4
5	question: <<QUESTION>>
6
7	+ answer:

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ pdfplumber
 textwrap3
 openai
 python-dotenv
-jsonlines

 textwrap3
 openai
 python-dotenv
+jsonlines
+pytest

test/test.py → src/__init__.py RENAMED Viewed

File without changes

src/gpt_3_manager.py CHANGED Viewed

@@ -5,13 +5,13 @@ class Gpt3Manager:
     def __init__(self, api_key):
         openai.api_key = api_key
-    def get_completion(prompt, max_tokens=128, model="text-davinci-003"):
         response = None
         try:
             response = openai.Completion.create(
-                model=model,
                 prompt=prompt,
                 max_tokens=max_tokens,
             )["choices"][0]["text"]
         except Exception as err:
@@ -19,11 +19,11 @@ class Gpt3Manager:
         return response
-    def get_embedding(text, model="text-similarity-ada-001"):
-        text = text.replace("\n", " ")
         embedding = None
         try:
-            embedding = openai.Embedding.create(input=[text], model=model)["data"][0][
                 "embedding"
             ]
         except Exception as err:

     def __init__(self, api_key):
         openai.api_key = api_key
+    def get_completion(self, prompt, max_tokens=128, model="text-davinci-003"):
         response = None
         try:
             response = openai.Completion.create(
                 prompt=prompt,
                 max_tokens=max_tokens,
+                model=model,
             )["choices"][0]["text"]
         except Exception as err:
         return response
+    def get_embedding(self, prompt, model="text-similarity-ada-001"):
+        prompt = prompt.replace("\n", " ")
         embedding = None
         try:
+            embedding = openai.Embedding.create(input=[prompt], model=model)["data"][0][
                 "embedding"
             ]
         except Exception as err:

src/index.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 import jsonlines
 from gpt_3_manager import Gpt3Manager
-from src.utils import dot_similarity
 class Index(ABC):
@@ -11,9 +11,6 @@ class Index(ABC):
 class JsonLinesIndex(Index):
-    def __init__(self):
-        pass
     def load(self, path):
         with jsonlines.open(path) as passages:
             indexes = list(passages)
@@ -21,14 +18,15 @@ class JsonLinesIndex(Index):
 class IndexSearchEngine:
-    def __init__(self, index):
-        index = index
-    def search(self, question, indexes, count=4):
-        question_embedding = Gpt3Manager.get_embedding(question)
         simmilarities = []
-        for index in indexes:
             embedding = index["embedding"]
             score = dot_similarity(question_embedding, embedding)
             simmilarities.append({"index": index, "score": score})

 from abc import ABC, abstractmethod
 import jsonlines
 from gpt_3_manager import Gpt3Manager
+from utils import dot_similarity
 class Index(ABC):
 class JsonLinesIndex(Index):
     def load(self, path):
         with jsonlines.open(path) as passages:
             indexes = list(passages)
 class IndexSearchEngine:
+    def __init__(self, indexes, gpt_manager):
+        self.indexes = indexes
+        self.gpt_manager = gpt_manager
+    def search(self, question, count=4):
+        question_embedding = self.gpt_manager.get_embedding(prompt=question)
         simmilarities = []
+        for index in self.indexes:
             embedding = index["embedding"]
             score = dot_similarity(question_embedding, embedding)
             simmilarities.append({"index": index, "score": score})

src/prompt.py CHANGED Viewed

@@ -1,35 +1,57 @@
 from abc import ABC, abstractmethod
-class Prompt(ABC):
-    def load_prompt(path):
         with open(path) as f:
             lines = f.readlines()
             return "".join(lines)
     @abstractmethod
     def load(self, path):
         pass
 class QuestionAnsweringPrompt(Prompt):
-    def __init__(self, result, question):
-        result = result
-        question = question
     def load(self, path):
         prompt = (
             self.load_prompt(path)
-            .replace("<<PASSAGE>>", self.result["index"]["content"])
             .replace("<<QUESTION>>", self.question)
         )
         return prompt
 class PassageSummarizationPrompt(Prompt):
-    def __init__(self, answers):
-        self.answers = answers
     def load(self, path):
-        prompt = self.load_prompt(path).replace("<<PASSAGE>>", "\n".join(self.answers))
         return prompt

 from abc import ABC, abstractmethod
+# Prompt Loaders
+class PromptLoader(ABC):
+    @abstractmethod
+    def load_prompt():
+        pass
+class TextPromptLoader(PromptLoader):
+    def load_prompt(self, path):
         with open(path) as f:
             lines = f.readlines()
             return "".join(lines)
+# Prompts
+class Prompt(ABC):
+    def __init__(self, prompt_loader: PromptLoader):
+        self.prompt_loader = prompt_loader
+    def load_prompt(self, path):
+        return self.prompt_loader.load_prompt(path)
     @abstractmethod
     def load(self, path):
         pass
 class QuestionAnsweringPrompt(Prompt):
+    def __init__(self, passage, question, prompt_loader):
+        super().__init__(prompt_loader=prompt_loader)
+        self.passage = passage
+        self.question = question
+    # trust me, you'll need this later
+    # .replace("<<PASSAGE>>", self.result["index"]["content"])
     def load(self, path):
         prompt = (
             self.load_prompt(path)
+            .replace("<<PASSAGE>>", self.passage)
             .replace("<<QUESTION>>", self.question)
         )
         return prompt
 class PassageSummarizationPrompt(Prompt):
+    def __init__(self, passage, prompt_loader):
+        super().__init__(prompt_loader=prompt_loader)
+        self.passage = passage
+    # prompt = self.load_prompt(path).replace("<<PASSAGE>>", "\n".join(self.answers))
     def load(self, path):
+        prompt = self.load_prompt(path).replace("<<PASSAGE>>", self.passage)
         return prompt

src/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/tests/chat_test.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from pathlib import Path
+from index import IndexSearchEngine
+from gpt_3_manager import Gpt3Manager
+from dotenv import load_dotenv
+from chat import ChatBot
+from index import JsonLinesIndex
+# load_dotenv()
+# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+# def test_chatbot():
+#     path = Path("index") / "index.jsonl"
+#     index = JsonLinesIndex()
+#     loaded = index.load(path)
+#     gpt_manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+#     engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
+#     chatbot = ChatBot(engine)
+#     answer = chatbot.ask("What does the twitter terms of service does")
+#     print(answer)
+#     # assert 0 == 0
+# test_chatbot()

src/tests/gpt_3_manager_test.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from dotenv import load_dotenv
+from gpt_3_manager import Gpt3Manager
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+def test_gpt3_completion():
+    manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    request = manager.get_completion(
+        prompt="This is a testing prompt", max_tokens=10, model="text-ada-001"
+    )
+    assert request != None
+def test_gpt3_embedding():
+    manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    request = manager.get_embedding(prompt="This is a testing prompt")
+    assert request != None

src/tests/index_test.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+from index import JsonLinesIndex, IndexSearchEngine
+from gpt_3_manager import Gpt3Manager
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+def test_jsonlines_index():
+    path = Path("index") / "index.jsonl"
+    index = JsonLinesIndex()
+    result = index.load(path)
+    assert result != None
+def test_index_serach_engine():
+    path = Path("index") / "index.jsonl"
+    gpt_manager = Gpt3Manager(OPENAI_API_KEY)
+    index = JsonLinesIndex()
+    loaded = index.load(path)
+    engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
+    results = engine.search(question="What does the twitter tos does")
+    assert results != None

src/tests/prompt_test.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from pathlib import Path
+from prompt import QuestionAnsweringPrompt, PassageSummarizationPrompt, TextPromptLoader
+def test_text_prompt_loader():
+    path = Path("prompts") / "question_answering.txt"
+    prompt_loader = TextPromptLoader()
+    prompt = prompt_loader.load_prompt(path)
+    testing_prompt = (
+        "Use the passage to write a detailed answer to the following question\n"
+        "\n"
+        "passage: <<PASSAGE>>\n"
+        "\n"
+        "question: <<QUESTION>>\n"
+        "\n"
+        "answer:"
+    )
+    assert prompt == testing_prompt
+def test_question_answering_prompt():
+    path = Path("prompts") / "question_answering.txt"
+    passage = "Hi, I'm foo and I love cycling and programming"
+    question = "What is foo's hobby"
+    prompt_loader = TextPromptLoader()
+    prompt = QuestionAnsweringPrompt(passage, question, prompt_loader)
+    loaded_prompt = prompt.load(path)
+    testing_prompt = (
+        "Use the passage to write a detailed answer to the following question\n"
+        "\n"
+        "passage: Hi, I'm foo and I love cycling and programming\n"
+        "\n"
+        "question: What is foo's hobby\n"
+        "\n"
+        "answer:"
+    )
+    assert loaded_prompt == testing_prompt
+def test_passage_summarization_prompt():
+    path = Path("prompts") / "passage_summarization.txt"
+    passage = "Hi, I'm foo and I love cycling and programming"
+    prompt_loader = TextPromptLoader()
+    prompt = PassageSummarizationPrompt(passage, prompt_loader)
+    loaded_prompt = prompt.load(path)
+    testing_prompt = (
+        "Summarize the following passage in detail\n"
+        "passage: Hi, I'm foo and I love cycling and programming\n"
+        "\n"
+        "summary:"
+    )
+    assert loaded_prompt == testing_prompt

src/tests/utils_test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pathlib import Path
+from utils import load_prompt
+def test_load_prompt_default():
+    path = Path("prompts") / "question_answering.txt"
+    with open(path) as f:
+        lines = f.readlines()
+        testing_prompt = "".join(lines)
+    prompt = load_prompt(path)
+    assert prompt == testing_prompt