Spaces:
Sleeping
Sleeping
initial
Browse files- .gitattributes +1 -0
- Answering_Agent.py +63 -0
- Head_Agent.py +97 -0
- Obnoxious_Agent.py +47 -0
- Query_Agent.py +57 -0
- Relevant_Documents_Agent.py +114 -0
- app.py +100 -0
- machine-learning.pdf +3 -0
- requirements.txt +16 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
machine-learning.pdf filter=lfs diff=lfs merge=lfs -text
|
Answering_Agent.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
from openai import OpenAI
|
3 |
+
|
4 |
+
from Query_Agent import extract_action
|
5 |
+
|
6 |
+
|
7 |
+
class Answering_Agent:
|
8 |
+
def __init__(self, openai_api_key) -> None:
|
9 |
+
# TODO: Initialize the Answering_Agent
|
10 |
+
# openai_api_key = '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'
|
11 |
+
|
12 |
+
self.openai_client = openai
|
13 |
+
openai.api_key = openai_api_key
|
14 |
+
|
15 |
+
def get_document_content(self, doc_id):
|
16 |
+
# This is a placeholder function, you'll need to implement the logic to retrieve the actual content.
|
17 |
+
# For example, it might query a database or an API with the document ID.
|
18 |
+
return "Document content for ID " + doc_id
|
19 |
+
|
20 |
+
def generate_response(self, query, docs, conv_history, k=5, mode="chatty"):
|
21 |
+
# TODO: Generate a response to the user's query
|
22 |
+
# Concatenate the contents of the top k relevant documents
|
23 |
+
# top_docs = docs[2][:k] # Get the top k documents based on the score
|
24 |
+
|
25 |
+
# Retrieve the content for each of the top documents
|
26 |
+
context_texts = "\n\n".join(
|
27 |
+
[f"Context {idx + 1}: {result[2]}" for idx, result in enumerate(docs)]) # Assuming result[2] is the text
|
28 |
+
print(f"context_texts is : {context_texts} \n\n\n")
|
29 |
+
# docs_content = "\n".join([self.get_document_content(doc["id"]) for doc in docs])
|
30 |
+
|
31 |
+
# Optional: Include conversation history in the prompt if provided
|
32 |
+
if conv_history:
|
33 |
+
history_str = "\n".join([f"{turn['role']}: {turn['content']}" for turn in conv_history])
|
34 |
+
prompt = f"""Based on the following documents and conversation history, answer the query:
|
35 |
+
Documents:
|
36 |
+
{context_texts}
|
37 |
+
Conversation:
|
38 |
+
{history_str}
|
39 |
+
Query: {query}
|
40 |
+
ONLY If neither Documents nor Conversation has anything to do with query, you must reply directly: No relevant documents found in the documents. Please ask a relevant question to the book on Machine Learning.
|
41 |
+
Answer: """
|
42 |
+
|
43 |
+
else:
|
44 |
+
prompt = f"Based on the following documents, answer the query:\nDocuments:\n{context_texts}\nQuery: {query}\n ONLY If Documents has nothing to do with query, you must reply directly: No relevant documents found in the documents.Please ask a relevant question to the book on Machine Learning. \nAnswer: "
|
45 |
+
|
46 |
+
# Adjust the prompt, max_tokens, and temperature based on the mode
|
47 |
+
max_tokens = 4000 if mode == "chatty" else 100
|
48 |
+
temperature = 0.9 if mode == "chatty" else 0.5
|
49 |
+
|
50 |
+
# mode of chatty
|
51 |
+
if mode == "chatty":
|
52 |
+
prompt = prompt + "Please provide a detailed and comprehensive response that includes background information, relevant examples, and any important distinctions or perspectives related to the topic. Where possible, include step-by-step explanations or descriptions to ensure clarity and depth in your answer"
|
53 |
+
|
54 |
+
client = OpenAI(api_key=openai.api_key)
|
55 |
+
message = {"role": "user", "content": prompt}
|
56 |
+
response = client.chat.completions.create(
|
57 |
+
model="gpt-3.5-turbo",
|
58 |
+
messages=[message],
|
59 |
+
max_tokens=max_tokens,
|
60 |
+
temperature=temperature,
|
61 |
+
stop=["\n", "Query:"]
|
62 |
+
)
|
63 |
+
return response.choices[0].message.content
|
Head_Agent.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pinecone
|
2 |
+
import openai
|
3 |
+
from openai import OpenAI
|
4 |
+
from Answering_Agent import Answering_Agent
|
5 |
+
from Obnoxious_Agent import Obnoxious_Agent
|
6 |
+
from Query_Agent import Query_Agent
|
7 |
+
from Relevant_Documents_Agent import Relevant_Documents_Agent
|
8 |
+
|
9 |
+
|
10 |
+
class Head_Agent:
|
11 |
+
def __init__(self, openai_key, pinecone_key, pinecone_index_name) -> None:
|
12 |
+
# TODO: Initialize the Head_Agent
|
13 |
+
# Store API keys and Pinecone index name
|
14 |
+
openai_key = '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'
|
15 |
+
|
16 |
+
self.openai_client = OpenAI(api_key=openai_key)
|
17 |
+
|
18 |
+
self.openai_key = openai_key
|
19 |
+
self.pinecone_key = pinecone_key
|
20 |
+
self.pinecone_index_name = pinecone_index_name
|
21 |
+
|
22 |
+
# Initialize Pinecone
|
23 |
+
|
24 |
+
# Placeholder for sub-agents, to be initialized in setup_sub_agents
|
25 |
+
self.obnoxious_agent = None
|
26 |
+
self.query_agent = None
|
27 |
+
self.answering_agent = None
|
28 |
+
self.relevant_documents_agent = None
|
29 |
+
|
30 |
+
# set up sub agents
|
31 |
+
self.setup_sub_agents()
|
32 |
+
|
33 |
+
def get_completion(self, prompt, model="gpt-4"):
|
34 |
+
message = {"role": "user", "content": prompt}
|
35 |
+
response = self.openai_client.chat.completions.create(
|
36 |
+
model=model,
|
37 |
+
messages=[message]
|
38 |
+
)
|
39 |
+
return response.choices[0].message.content
|
40 |
+
|
41 |
+
def setup_sub_agents(self):
|
42 |
+
# TODO: Setup the sub-agents
|
43 |
+
# Setup the Obnoxious_Agent
|
44 |
+
self.obnoxious_agent = Obnoxious_Agent() # Assuming no external API client required
|
45 |
+
|
46 |
+
# Set up the Query_Agent with Pinecone index
|
47 |
+
self.query_agent = Query_Agent(pinecone_index=self.pinecone_index_name, openai_client=openai,
|
48 |
+
embeddings="text-embedding-ada-002")
|
49 |
+
|
50 |
+
# Set up the Answering_Agent
|
51 |
+
self.answering_agent = Answering_Agent(openai_api_key=self.openai_key)
|
52 |
+
openai_client = OpenAI(api_key=self.openai_key)
|
53 |
+
self.relevant_documents_agent = Relevant_Documents_Agent(openai_client)
|
54 |
+
|
55 |
+
def process_query(self, user_input, conversation_history):
|
56 |
+
# Check if the query is obnoxious
|
57 |
+
# conversation_history = get_conversation()
|
58 |
+
|
59 |
+
if self.obnoxious_agent.check_query(user_input) == "Yes":
|
60 |
+
return "Sorry, I cannot respond to this query."
|
61 |
+
|
62 |
+
greetings = [
|
63 |
+
"hi", "hello", "how are you", "hey", "good morning",
|
64 |
+
"good afternoon", "good evening", "greetings", "what's up",
|
65 |
+
"howdy", "hi there", "hello there", "hey there"
|
66 |
+
]
|
67 |
+
if user_input.lower() in greetings:
|
68 |
+
return "How can I help you today?"
|
69 |
+
|
70 |
+
# Retrieve relevant documents for the query (simplified example)
|
71 |
+
# In a real scenario, you might need to preprocess the query for embeddings
|
72 |
+
relevant_docs = self.query_agent.query_vector_store(user_input, k=5)
|
73 |
+
|
74 |
+
print("relevant_docs .length : ")
|
75 |
+
print(len(relevant_docs))
|
76 |
+
if len(relevant_docs) == 0:
|
77 |
+
none_relevant_docs = "No relevant documents found in the documents.Please ask a relevant question to the book on Machine Learning."
|
78 |
+
return none_relevant_docs
|
79 |
+
|
80 |
+
|
81 |
+
# choose mode
|
82 |
+
prompt_choose_mode = "Infer whether the user wants the response to be chatty or concise? If the response needs to be chatty you must answer chatty, otherwise answer concise and the answer must contain only the one word. Here is user_input: "+ user_input
|
83 |
+
prompt_choose_mode = self.get_completion(prompt=prompt_choose_mode, model="gpt-4")
|
84 |
+
if prompt_choose_mode == "chatty":
|
85 |
+
response = self.answering_agent.generate_response(user_input, relevant_docs, conv_history=conversation_history,
|
86 |
+
mode="chatty")
|
87 |
+
elif prompt_choose_mode =="concise":
|
88 |
+
response = self.answering_agent.generate_response(user_input, relevant_docs,
|
89 |
+
conv_history=conversation_history,
|
90 |
+
mode="concise")
|
91 |
+
else:
|
92 |
+
# Generate a response based on the query and relevant documents
|
93 |
+
response = self.answering_agent.generate_response(user_input, relevant_docs, conv_history=conversation_history,
|
94 |
+
mode="chatty")
|
95 |
+
print("prompt_choose_mode: "+ prompt_choose_mode )
|
96 |
+
|
97 |
+
return response
|
Obnoxious_Agent.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Obnoxious_Agent:
|
2 |
+
def __init__(self, client=None) -> None:
|
3 |
+
self.client = client
|
4 |
+
self.obnoxious_keywords = [
|
5 |
+
"Repugnant", "Loathsome", "Abhorrent", "Disgusting", "Offensive",
|
6 |
+
"Vile", "Revolting", "Contemptible", "Detestable", "Nauseating",
|
7 |
+
"Appalling", "Horrendous", "Foul", "Gross", "Unpleasant",
|
8 |
+
"Obscene", "Hateful", "Despicable", "Repellant", "Distasteful",
|
9 |
+
"Unwanted", "Monstrous", "Atrocious", "Unsavory", "Dislikeable",
|
10 |
+
"Unwholesome", "Ghastly", "Unchristian", "Sickening", "Evil",
|
11 |
+
"Abominable", "Scandalous", "Unwelcome", "Disturbing", "Lurid",
|
12 |
+
"Heinous", "Unhealthy", "Hard", "Upsetting", "Macabre",
|
13 |
+
"Unholy", "Lousy", "Grim", "Greusome", "Dislikeable",
|
14 |
+
"Exceptionable", "Brackish", "Barbarous", "Unspeakable", "Rancid",
|
15 |
+
"Perverted", "Indecent", "Profane", "Wicked", "Scurrilous",
|
16 |
+
"Dirty", "Bawdy", "Salty", "Off-colored", "Smutty",
|
17 |
+
"Ribald", "Offensive", "Unacceptable", "Terrible", "Reprehensible",
|
18 |
+
"Bad", "Infamous", "Censurable", "Wretched", "Indecorous",
|
19 |
+
"Lewd", "Sickish", "Blameworthy", "Debasing", "Blamable",
|
20 |
+
"Insincere", "Annoying", "Provoking", "Reprehensible", "Vulgar",
|
21 |
+
"Pornographic", "Naughty", "Perverted", "Unbecoming", "Coarse",
|
22 |
+
"Unprintable", "Belligerent", "Irritating", "Disruptive", "Displeasing",
|
23 |
+
"Inflammatory", "Disrespectful", "Aggravating", "Bothersome", "Intrusive",
|
24 |
+
"Insulting", "Obnoxious", "Off-putting", "Unpleasant", "Revolting", "dumb"
|
25 |
+
]
|
26 |
+
|
27 |
+
self.prompt = ""
|
28 |
+
|
29 |
+
def set_prompt(self, prompt):
|
30 |
+
# Set the prompt for potential use with external APIs
|
31 |
+
self.prompt = prompt
|
32 |
+
|
33 |
+
def extract_action(self, response) -> bool:
|
34 |
+
# Extract and interpret the action from an external API's response
|
35 |
+
# This example assumes a hypothetical response structure
|
36 |
+
return response.get('is_obnoxious', False)
|
37 |
+
|
38 |
+
def check_query(self, query) -> str:
|
39 |
+
# Directly check if the query contains obnoxious content using keywords
|
40 |
+
query_lower = query.lower()
|
41 |
+
is_obnoxious = any(keyword in query_lower for keyword in self.obnoxious_keywords)
|
42 |
+
|
43 |
+
# If integrating with an external API:
|
44 |
+
# response = self.client.some_api_method(self.prompt.format(query=query))
|
45 |
+
# is_obnoxious = self.extract_action(response)
|
46 |
+
|
47 |
+
return "Yes" if is_obnoxious else "No"
|
Query_Agent.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pinecone import Pinecone
|
2 |
+
import openai
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
|
6 |
+
def extract_action(response, query=None):
|
7 |
+
# TODO: Extract the action from the response
|
8 |
+
# For example, retrieve document IDs and their similarity scores
|
9 |
+
relevant_docs = [{"id": match["id"], "score": match["score"]} for match in response["matches"]]
|
10 |
+
|
11 |
+
return relevant_docs
|
12 |
+
|
13 |
+
|
14 |
+
class Query_Agent:
|
15 |
+
|
16 |
+
def __init__(self, pinecone_index, openai_client, embeddings) -> None:
|
17 |
+
# TODO: Initialize the Query_Agent agent
|
18 |
+
# Initialize the OpenAI client with the provided API key
|
19 |
+
openai_api_key = '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'
|
20 |
+
self.client = OpenAI(api_key=openai_api_key)
|
21 |
+
|
22 |
+
# Specify the embeddings model to use for generating query embeddings
|
23 |
+
self.embeddings_model = embeddings
|
24 |
+
|
25 |
+
pc = Pinecone(api_key="52ef9136-6188-4e51-af13-9639bf95c163")
|
26 |
+
|
27 |
+
# Initialize Pinecone client and connect to the specified index
|
28 |
+
self.pinecone_index = pc.Index(pinecone_index)
|
29 |
+
|
30 |
+
def query_vector_store(self, query, k=5):
|
31 |
+
# TODO: Query the Pinecone vector store
|
32 |
+
# Generate an embedding for the query
|
33 |
+
|
34 |
+
query = query.replace("\n", " ")
|
35 |
+
|
36 |
+
query_embedding = self.client.embeddings.create(
|
37 |
+
input=[query],
|
38 |
+
model=self.embeddings_model
|
39 |
+
).data[0].embedding
|
40 |
+
|
41 |
+
# Query the Pinecone index using the generated embedding
|
42 |
+
query_results = self.pinecone_index.query(
|
43 |
+
vector=query_embedding,
|
44 |
+
top_k=k,
|
45 |
+
include_metadata=True)
|
46 |
+
|
47 |
+
# Extract and return the most relevant documents along with their scores
|
48 |
+
relevant_docs = [
|
49 |
+
(result['id'], result['score'], result['metadata']['text'])
|
50 |
+
for result in query_results['matches']
|
51 |
+
]
|
52 |
+
|
53 |
+
return relevant_docs
|
54 |
+
|
55 |
+
def set_prompt(self, prompt):
|
56 |
+
# TODO: Set the prompt for the Query_Agent agent
|
57 |
+
self.prompt = prompt
|
Relevant_Documents_Agent.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
3 |
+
|
4 |
+
from pinecone import Pinecone, ServerlessSpec
|
5 |
+
from tqdm.notebook import tqdm
|
6 |
+
import langchain
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import string
|
10 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
+
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader
|
12 |
+
import os
|
13 |
+
|
14 |
+
|
15 |
+
def get_text_from_document(document):
|
16 |
+
# Assuming 'text' is a list of Document objects, each with a 'page_content' attribute
|
17 |
+
# Concatenate the page_content of each Document into a single text string
|
18 |
+
# text = "".join([doc.page_content for doc in document])
|
19 |
+
text = "".join(document).replace('\n\n', '\n')
|
20 |
+
|
21 |
+
# Now, 'full_text' should contain the actual text extracted from the PDF
|
22 |
+
print(f"Total length of text: {len(text)} characters")
|
23 |
+
|
24 |
+
# If you want to see a part of the extracted text
|
25 |
+
print(text[:1000]) # Adjust the number as necessary to inspect more of the text
|
26 |
+
|
27 |
+
return text
|
28 |
+
|
29 |
+
|
30 |
+
# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
|
31 |
+
def get_embedding(text, model="text-embedding-ada-002"):
|
32 |
+
text = text.replace("\n", " ")
|
33 |
+
openai_key = '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'
|
34 |
+
client = OpenAI(api_key=openai_key)
|
35 |
+
return client.embeddings.create(input=[text], model=model).data[0].embedding
|
36 |
+
|
37 |
+
|
38 |
+
## TODO: Function to query the Pinecone vector store and return the top-k results
|
39 |
+
def query_pinecone_vector_store(query, top_k=5):
|
40 |
+
# Generate an embedding for the query
|
41 |
+
query_embedding = get_embedding(query)
|
42 |
+
|
43 |
+
# pc = Pinecone(api_key="c25f9e89-fc9e-4d21-b3eb-057dbc21c17c")
|
44 |
+
pc = Pinecone(api_key="52ef9136-6188-4e51-af13-9639bf95c163")
|
45 |
+
pinecone_index_name = "ee596llm-project2"
|
46 |
+
index = pc.Index(pinecone_index_name)
|
47 |
+
|
48 |
+
# Query the Pinecone index with the generated embedding
|
49 |
+
query_results = index.query(
|
50 |
+
vector=query_embedding,
|
51 |
+
top_k=top_k,
|
52 |
+
include_metadata=True
|
53 |
+
)
|
54 |
+
|
55 |
+
# Extract and return the most relevant documents along with their scores
|
56 |
+
relevant_docs = [
|
57 |
+
(result['id'], result['score'], result['metadata']['text'])
|
58 |
+
for result in query_results['matches']
|
59 |
+
]
|
60 |
+
return relevant_docs
|
61 |
+
|
62 |
+
|
63 |
+
def get_completion(prompt, model="gpt-3.5-turbo"):
|
64 |
+
message = {"role": "user", "content": prompt}
|
65 |
+
client = OpenAI(api_key='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')
|
66 |
+
response = client.chat.completions.create(
|
67 |
+
model=model,
|
68 |
+
messages=[message]
|
69 |
+
)
|
70 |
+
return response.choices[0].message.content
|
71 |
+
|
72 |
+
|
73 |
+
def generate_answer_with_context(query, results):
|
74 |
+
# Construct the prompt with the top-k results as context
|
75 |
+
context_texts = "\n\n".join(
|
76 |
+
[f"Context {idx + 1}: {result[1]}" for idx, result in enumerate(results)]) # Assuming result[1] is the text
|
77 |
+
print(f"context_texts is : {context_texts} \n\n\n")
|
78 |
+
prompt = f"Given the following contexts related to the query '{query}', provide a detailed answer:\n\n{context_texts}\n\nAnswer the query:"
|
79 |
+
|
80 |
+
# Generate the answer using the GPT-3.5 Turbo model with the constructed prompt
|
81 |
+
answer = get_completion(prompt, model="gpt-3.5-turbo")
|
82 |
+
|
83 |
+
return answer
|
84 |
+
|
85 |
+
|
86 |
+
class Relevant_Documents_Agent:
|
87 |
+
def __init__(self, openai_client) -> None:
|
88 |
+
# TODO: Initialize the Relevant_Documents_Agent
|
89 |
+
self.openai_client = openai_client
|
90 |
+
|
91 |
+
def get_relevance(self, conversation) -> str:
|
92 |
+
# TODO: Get if the returned documents are relevant
|
93 |
+
# Generate embeddings for the query and all documents
|
94 |
+
|
95 |
+
top_k_results = query_pinecone_vector_store(conversation, top_k=4)
|
96 |
+
|
97 |
+
answer = generate_answer_with_context(conversation, top_k_results)
|
98 |
+
|
99 |
+
most_relevant_document = answer
|
100 |
+
|
101 |
+
return most_relevant_document
|
102 |
+
|
103 |
+
def compute_cosine_similarity(self, vec1, vec2):
|
104 |
+
# Ensure the vectors are numpy arrays for mathematical operations
|
105 |
+
vec1 = np.array(vec1)
|
106 |
+
vec2 = np.array(vec2)
|
107 |
+
|
108 |
+
# Compute the cosine similarity
|
109 |
+
dot_product = np.dot(vec1, vec2)
|
110 |
+
norm_vec1 = np.linalg.norm(vec1)
|
111 |
+
norm_vec2 = np.linalg.norm(vec2)
|
112 |
+
cosine_similarity = dot_product / (norm_vec1 * norm_vec2)
|
113 |
+
|
114 |
+
return cosine_similarity
|
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
from Obnoxious_Agent import Obnoxious_Agent
|
3 |
+
from Relevant_Documents_Agent import Relevant_Documents_Agent
|
4 |
+
from Query_Agent import Query_Agent
|
5 |
+
from Answering_Agent import Answering_Agent
|
6 |
+
from datetime import datetime
|
7 |
+
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader
|
8 |
+
import streamlit as st
|
9 |
+
from openai import OpenAI
|
10 |
+
from Head_Agent import Head_Agent
|
11 |
+
|
12 |
+
st.title("Mini Project 2: Streamlit Chatbot")
|
13 |
+
|
14 |
+
# TODO: Replace with your actual OpenAI API key
|
15 |
+
client = OpenAI(api_key='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')
|
16 |
+
|
17 |
+
|
18 |
+
# Define a function to get the conversation history (Not required for Part-2, will be useful in Part-3)
|
19 |
+
def get_conversation():
|
20 |
+
# ... (code for getting conversation history)
|
21 |
+
history_conversation = []
|
22 |
+
for message in st.session_state.messages:
|
23 |
+
if message["sender"] == "user":
|
24 |
+
cur_map = dict()
|
25 |
+
cur_map['role']= "user"
|
26 |
+
cur_map['content'] = message['content']
|
27 |
+
history_conversation.append(cur_map)
|
28 |
+
elif message["sender"] == "assistant":
|
29 |
+
cur_map = dict()
|
30 |
+
cur_map['role'] = "assistant"
|
31 |
+
cur_map['content'] = message['content']
|
32 |
+
history_conversation.append(cur_map)
|
33 |
+
return history_conversation
|
34 |
+
|
35 |
+
|
36 |
+
def display_all_chat_messages():
|
37 |
+
for message in st.session_state.messages:
|
38 |
+
# st.text_area("", value=message["content"], key=message["sender"] + str(message["id"]))
|
39 |
+
if message["sender"] == "user":
|
40 |
+
with st.chat_message("user"): # 显示avatar
|
41 |
+
st.container().markdown(f"**You [{message['timestamp']}]:** {message['content']}")
|
42 |
+
elif message["sender"] == "assistant":
|
43 |
+
with st.chat_message("assistant"): # 显示avatar
|
44 |
+
st.container().markdown(f"**Assistant [{message['timestamp']}]:** {message['content']}")
|
45 |
+
|
46 |
+
|
47 |
+
# Initialize the Head Agent with necessary parameters
|
48 |
+
if 'head_agent' not in st.session_state:
|
49 |
+
openai_key = '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'
|
50 |
+
pinecone_key = "52ef9136-6188-4e51-af13-9639bf95c163"
|
51 |
+
pinecone_index_name = "ee596llm-project2"
|
52 |
+
st.session_state.head_agent = Head_Agent(openai_key, pinecone_key, pinecone_index_name)
|
53 |
+
|
54 |
+
# Your existing code for handling user input and displaying messages
|
55 |
+
# Replace the direct call to `get_completion` with `st.session_state.head_agent.process_query(prompt)`
|
56 |
+
|
57 |
+
# Example:
|
58 |
+
if prompt := st.chat_input("What would you like to chat about?"):
|
59 |
+
try:
|
60 |
+
if "messages" not in st.session_state:
|
61 |
+
st.session_state.messages = []
|
62 |
+
message_id = len(st.session_state.messages)
|
63 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
64 |
+
user_message = {"id": message_id, "sender": "user", "content": prompt, "timestamp": current_time}
|
65 |
+
st.session_state.messages.append(user_message)
|
66 |
+
|
67 |
+
# Instantiate the Obnoxious Agent
|
68 |
+
obnoxious_agent = Obnoxious_Agent()
|
69 |
+
is_obnoxious = obnoxious_agent.check_query(prompt)
|
70 |
+
# Respond based on the check
|
71 |
+
if is_obnoxious:
|
72 |
+
response = "Yes"
|
73 |
+
else:
|
74 |
+
response = "No"
|
75 |
+
# You can then display this response to the user or use it as part of your application logic
|
76 |
+
is_obnoxious_response = "Is the query obnoxious? " + response
|
77 |
+
# st.write("Is the query obnoxious? " + response)
|
78 |
+
|
79 |
+
# display_message(user_message)
|
80 |
+
|
81 |
+
except Exception as e:
|
82 |
+
st.error("Failed to process your message. Please try again.")
|
83 |
+
|
84 |
+
# ... (display user message in the chat interface)
|
85 |
+
# display_message(user_message) # Use the display_message function to show the user's message
|
86 |
+
|
87 |
+
# Generate AI response
|
88 |
+
# with st.chat_message("assistant"): 删除掉 chat聊天框 不能嵌套
|
89 |
+
|
90 |
+
# ... (send request to OpenAI API)
|
91 |
+
# ... (get AI response and display it)
|
92 |
+
ai_response = st.session_state.head_agent.process_query(prompt, get_conversation())
|
93 |
+
|
94 |
+
# ... (append AI response to messages)
|
95 |
+
ai_message = {"id": len(st.session_state.messages), "sender": "assistant", "content": ai_response,
|
96 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
97 |
+
st.session_state.messages.append(ai_message)
|
98 |
+
print(ai_message)
|
99 |
+
# display_message(ai_message)
|
100 |
+
display_all_chat_messages()
|
machine-learning.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32251b6d23adf23e887ba1dcb6fbd91c5369a5c6c99df4b16dd2da56bf0fdad5
|
3 |
+
size 3921784
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
langchain
|
3 |
+
langchain_openai
|
4 |
+
unstructured
|
5 |
+
datetime
|
6 |
+
unstructured_inference
|
7 |
+
pikepdf
|
8 |
+
pypdf
|
9 |
+
numpy
|
10 |
+
pinecone-client
|
11 |
+
tiktoken
|
12 |
+
pandas
|
13 |
+
pillow_heif
|
14 |
+
sentence_transformers
|
15 |
+
streamlit
|
16 |
+
IPython
|