Spaces:

Curranj
/

GPT-QRI

Running

File size: 3,219 Bytes

a957f06
1a7f63c
 
 
21e989d
082f753
10c8907
 
21e989d
9611b40
1a7f63c
21e989d
 
1a7f63c
21e989d
 
 
1a7f63c
 
 
 
 
 
e9fbc48
1a7f63c
e9fbc48
 
 
 
 
 
 
 
 
 
 
 
 
21e989d
1a7f63c
e9fbc48
1a7f63c
 
e9fbc48
21e989d
 
529ef05
21e989d
 
 
e9fbc48
21e989d
 
 
 
 
4c1ec95
21e989d
e9fbc48
 
 
 
 
21e989d
 
 
 
1a7f63c
21e989d
 
 
 
 
 
 
 
 
 
 
4da74af
 
21e989d

import sklearn
import sqlite3
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import os
import gradio as gr

client = OpenAI(api_key=os.environ["Secret"])

def find_closest_neighbors(vector1, dictionary_of_vectors):
    """Takes a vector and a dictionary of vectors and returns the three closest neighbors"""
    vector = client.embeddings.create(
        input=vector1,
        model="text-embedding-ada-002"
    ).data[0].embedding
    
    vector = np.array(vector)
    cosine_similarities = {}
    for key, value in dictionary_of_vectors.items():
        cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
    sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
    match_list = sorted_cosine_similarities[0:4]
    return match_list

def predict(message, history):
    # Connect to the database
    conn = sqlite3.connect('QRIdatabase7.db')
    cursor = conn.cursor()
    cursor.execute('''SELECT text, embedding FROM chunks''')
    rows = cursor.fetchall()
    dictionary_of_vectors = {}
    for row in rows:
        text = row[0]
        embedding_str = row[1]
        embedding = np.fromstring(embedding_str, sep=' ')
        dictionary_of_vectors[text] = embedding
    conn.close()
    
    # Find the closest neighbors
    match_list = find_closest_neighbors(message, dictionary_of_vectors)
    context = ''
    for match in match_list:
        context += str(match[0])
    context = context[:1500]  # Limit context length
    
    prep = f"This is an OpenAI model tuned to answer questions specific to the Qualia Research institute, a research institute that focuses on consciousness. Here are some question-specific passages selected that may or may not be useful in answering user queries. Here is the user query to answer, potentially related to consciousness, the human experience, and phenomenology: {context}. Here is a question specific to QRI and consciousness in general Q: {message} A: "
    
    messages = []
    # Convert history to the expected format
    for human, assistant in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": prep})
    
    stream = client.chat.completions.create(
        model="gpt-4.5-preview-2025-02-27",
        messages=messages,
        temperature=1.0,
        stream=True
    )
    
    partial_message = ""
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            partial_message += chunk.choices[0].delta.content
            yield partial_message

with gr.Blocks(title="QRI Research Assistant") as demo:
    chatbot = gr.ChatInterface(
        predict,
        title="QRI Research Assistant",
        description="Ask questions about consciousness, human experience, and phenomenology based on QRI research.",
        examples=[
            "What is consciousness?",
            "How does QRI approach the study of phenomenology?",
            "What are the key theories about qualia?"
        ]
    )

if __name__ == "__main__":
    demo.launch()