Spaces:
Running
Running
import streamlit as st | |
import joblib | |
import numpy as np | |
import hnswlib | |
import os | |
from openai import OpenAI | |
# Initialize OpenAI client using secret from Hugging Face Spaces | |
client = OpenAI(api_key=os.getenv("POCJujitsu")) | |
# Load serialized HNSW index and document chunks | |
model_data = joblib.load("rag_model_hnsw.joblib") | |
chunks = model_data["chunks"] | |
index = model_data["index"] | |
# Embed query using OpenAI embedding API | |
def embed_query(text): | |
response = client.embeddings.create( | |
model="text-embedding-3-small", | |
input=text | |
) | |
return np.array(response.data[0].embedding, dtype=np.float32).reshape(1, -1) | |
# Semantic search using HNSWlib | |
def search(query, k=3): | |
query_vec = embed_query(query).astype(np.float32) | |
labels, distances = index.knn_query(query_vec, k=k) | |
return [chunks[i] for i in labels[0]] | |
# Chat modes | |
def chat_no_rag(question): | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": question}], | |
temperature=0.5, | |
max_tokens=300 | |
) | |
return response.choices[0].message.content | |
def chat_with_rag(question, context_chunks): | |
context = "\n".join(context_chunks) | |
prompt = ( | |
"Usa el siguiente contexto como referencia para responder la pregunta. " | |
"Puedes complementar con tus propios conocimientos si es necesario.\n\n" | |
f"Contexto:\n{context}\n\n" | |
f"Pregunta: {question}\nRespuesta:" | |
) | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.3, | |
max_tokens=300 | |
) | |
return response.choices[0].message.content | |
def chat_with_rag_enhanced(question, context_chunks): | |
context = "\n".join(context_chunks) | |
prompt = ( | |
"Eres un experto en historia marcial. " | |
"Explica con contexto la respuesta de una manera facil de entender para las personas que no saben mucho sobre el tema " | |
"Usa el siguiente contexto como referencia para responder la pregunta. " | |
"Puedes complementar con tus propios conocimientos si es necesario.\n\n" | |
f"Contexto:\n{context}\n\n" | |
f"Pregunta: {question}\nRespuesta:" | |
) | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.2, | |
max_tokens=300 | |
) | |
return response.choices[0].message.content | |
# Streamlit UI | |
st.set_page_config(page_title="RAG JuJutsu Q&A") | |
st.title("π€ JuJutsu AI - Ask Anything") | |
st.markdown("Ask a question about jujutsu history, techniques, or philosophy.") | |
question = st.text_input("β Enter your question:") | |
mode = st.radio("Choose response mode:", ["No RAG", "With RAG", "With RAG + Expert Prompt"]) | |
if st.button("Get Answer") and question: | |
if mode == "No RAG": | |
answer = chat_no_rag(question) | |
else: | |
retrieved = search(question) | |
if mode == "With RAG": | |
answer = chat_with_rag(question, retrieved) | |
else: | |
answer = chat_with_rag_enhanced(question, retrieved) | |
st.markdown("### π§ Answer") | |
st.write(answer) | |