Spaces:
Running
Running
File size: 3,206 Bytes
a957f06 1a7f63c 21e989d 082f753 10c8907 21e989d 9611b40 1a7f63c 21e989d 1a7f63c 21e989d 1a7f63c e9fbc48 1a7f63c e9fbc48 21e989d 1a7f63c e9fbc48 1a7f63c e9fbc48 21e989d 529ef05 21e989d e9fbc48 21e989d e9fbc48 21e989d 1a7f63c 21e989d 4da74af 21e989d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import sklearn
import sqlite3
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import os
import gradio as gr
client = OpenAI(api_key=os.environ["Secret"])
def find_closest_neighbors(vector1, dictionary_of_vectors):
"""Takes a vector and a dictionary of vectors and returns the three closest neighbors"""
vector = client.embeddings.create(
input=vector1,
model="text-embedding-ada-002"
).data[0].embedding
vector = np.array(vector)
cosine_similarities = {}
for key, value in dictionary_of_vectors.items():
cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
match_list = sorted_cosine_similarities[0:4]
return match_list
def predict(message, history):
# Connect to the database
conn = sqlite3.connect('QRIdatabase7.db')
cursor = conn.cursor()
cursor.execute('''SELECT text, embedding FROM chunks''')
rows = cursor.fetchall()
dictionary_of_vectors = {}
for row in rows:
text = row[0]
embedding_str = row[1]
embedding = np.fromstring(embedding_str, sep=' ')
dictionary_of_vectors[text] = embedding
conn.close()
# Find the closest neighbors
match_list = find_closest_neighbors(message, dictionary_of_vectors)
context = ''
for match in match_list:
context += str(match[0])
context = context[:1500] # Limit context length
prep = f"This is an OpenAI model tuned to answer questions specific to the Qualia Research institute, a research institute that focuses on consciousness. Here are some question-specific passages selected that may or may not be useful in answering user queries. Here is the user query to answer, potentially related to consciousness, the human experience, and phenomenology: {context}. Here is a question specific to QRI and consciousness in general Q: {message} A: "
messages = []
# Convert history to the expected format
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": prep})
stream = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=1.0,
stream=True
)
partial_message = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
partial_message += chunk.choices[0].delta.content
yield partial_message
with gr.Blocks(title="QRI Research Assistant") as demo:
chatbot = gr.ChatInterface(
predict,
title="QRI Research Assistant",
description="Ask questions about consciousness, human experience, and phenomenology based on QRI research.",
examples=[
"What is consciousness?",
"How does QRI approach the study of phenomenology?",
"What are the key theories about qualia?"
]
)
if __name__ == "__main__":
demo.launch() |