Spaces:

zinoubm
/

TwitterTosChatBot

No application file

App Files Files Community

zinoubm commited on Feb 25, 2023

Commit

b678100

1 Parent(s): db68fe3

'prototype'

Browse files

Files changed (7) hide show

chat.py +66 -0
index/build_index.py +37 -0
index/index.jsonl +0 -0
prompts/passage_summarization.txt +4 -0
prompts/question_answering.txt +7 -0
requirements.txt +5 -0
utils.py +46 -0

chat.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import openai
+from dotenv import load_dotenv
+import jsonlines
+from pathlib import Path
+from utils import (
+    gpt3_embeddings,
+    gpt3_completion,
+    dot_similarity,
+    load_prompt,
+)
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai.api_key = OPENAI_API_KEY
+def search_index(question, indexes, count=4):
+    question_embedding = gpt3_embeddings(question)
+    simmilarities = []
+    for index in indexes:
+        embedding = index["embedding"]
+        score = dot_similarity(question_embedding, embedding)
+        simmilarities.append({"index": index, "score": score})
+        sorted_similarities = sorted(
+            simmilarities, key=lambda x: x["score"], reverse=True
+        )
+    return sorted_similarities[:count]
+if __name__ == "__main__":
+    with jsonlines.open(Path("./index") / "index.jsonl") as passages:
+        indexes = list(passages)
+    while True:
+        question = input("User >")
+        search_results = search_index(question=question, indexes=indexes, count=2)
+        answers = []
+        for result in search_results:
+            print("iterating over answering questions")
+            prompt = (
+                load_prompt("prompts\question_answering.txt")
+                .replace("<<PASSAGE>>", result["index"]["content"])
+                .replace("<<QUESTION>>", question)
+            )
+            answer = gpt3_completion(
+                prompt=prompt, max_tokens=80, model="text-curie-001"
+            )
+            answers.append(answer)
+            prompt = load_prompt("prompts\passage_summarization.txt").replace(
+                "<<PASSAGE>>", "\n".join(answers)
+            )
+            final_answer = gpt3_completion(prompt=prompt)
+        print(f"Bot: {final_answer}")

index/build_index.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import re
+from pathlib import Path
+from dotenv import load_dotenv
+import openai
+import textwrap
+import jsonlines
+from utils import gpt3_embeddings
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai.api_key = OPENAI_API_KEY
+path = Path("./documents")
+with open(path / "result.txt", "r") as f:
+    lines = f.readlines()
+    text = "".join(lines)
+    text = re.sub("\s+", " ", text)  # white space normalization
+result = []
+chunks = textwrap.wrap(text, 4000)
+for chunk in chunks:
+    embedding = gpt3_embeddings(chunk)
+    info = {"content": chunk, "embedding": embedding}
+    result.append(info)
+result_path = Path("./index")
+with jsonlines.open(result_path / "index.jsonl", "w") as writer:
+    writer.write_all(result)

index/index.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

prompts/passage_summarization.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Summarize the following passage in detail
+passage: <<PASSAGE>>
+summary:

prompts/question_answering.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Use the passage to write a detailed answer to the following question
+passage: <<PASSAGE>>
+question: <<QUESTION>>
+answer:

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pdfplumber
+textwrap3
+openai
+python-dotenv
+jsonlines

utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import openai
+import numpy as np
+def gpt3_embeddings(text, model="text-similarity-ada-001"):
+    text = text.replace("\n", " ")
+    embedding = None
+    try:
+        embedding = openai.Embedding.create(input=[text], model=model)["data"][0][
+            "embedding"
+        ]
+    except Exception as err:
+        print(f"Sorry, There was a problem {err}")
+    return embedding
+def gpt3_completion(prompt, max_tokens=128, model="text-davinci-003"):
+    response = None
+    try:
+        response = openai.Completion.create(
+            model=model,
+            prompt=prompt,
+            max_tokens=max_tokens,
+        )["choices"][0]["text"]
+    except Exception as err:
+        print(f"Sorry, There was a problem \n\n {err}")
+    return response
+def load_prompt(path):
+    with open(path) as f:
+        lines = f.readlines()
+        return "".join(lines)
+def cosine_similarity(emb1, emb2):
+    return np.dot(emb1, emb2) / (
+        (np.dot(emb1, emb1) ** 0.5) * (np.dot(emb2, emb2) ** 0.5)
+    )
+def dot_similarity(emb1, emb2):
+    return np.dot(emb1, emb2)