Spaces:
Running
Running
File size: 3,183 Bytes
1300f65 f07eaf3 1300f65 fd875db 1300f65 f19fa3f fd875db f19fa3f 63c936e fd875db 1300f65 f07eaf3 1300f65 fd875db f07eaf3 fd875db f07eaf3 1300f65 fd875db f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 fd875db 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 acda462 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
import joblib
import numpy as np
import hnswlib
import os
from openai import OpenAI
# Initialize OpenAI client using secret from Hugging Face Spaces
client = OpenAI(api_key=os.getenv("POCJujitsu"))
# Load serialized HNSW index and document chunks
model_data = joblib.load("rag_model_hnsw.joblib")
chunks = model_data["chunks"]
index = model_data["index"]
# Embed query using OpenAI embedding API
def embed_query(text):
response = client.embeddings.create(
model="text-embedding-3-small",
input=text
)
return np.array(response.data[0].embedding, dtype=np.float32).reshape(1, -1)
# Semantic search using HNSWlib
def search(query, k=3):
query_vec = embed_query(query).astype(np.float32)
labels, distances = index.knn_query(query_vec, k=k)
return [chunks[i] for i in labels[0]]
# Chat modes
def chat_no_rag(question):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
temperature=0.5,
max_tokens=300
)
return response.choices[0].message.content
def chat_with_rag(question, context_chunks):
context = "\n".join(context_chunks)
prompt = (
"Usa el siguiente contexto como referencia para responder la pregunta. "
"Puedes complementar con tus propios conocimientos si es necesario.\n\n"
f"Contexto:\n{context}\n\n"
f"Pregunta: {question}\nRespuesta:"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=300
)
return response.choices[0].message.content
def chat_with_rag_enhanced(question, context_chunks):
context = "\n".join(context_chunks)
prompt = (
"Eres un experto en historia marcial. "
"Explica con contexto la respuesta de una manera facil de entender para las personas que no saben mucho sobre el tema "
"Usa el siguiente contexto como referencia para responder la pregunta. "
"Puedes complementar con tus propios conocimientos si es necesario.\n\n"
f"Contexto:\n{context}\n\n"
f"Pregunta: {question}\nRespuesta:"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
max_tokens=300
)
return response.choices[0].message.content
# Streamlit UI
st.set_page_config(page_title="RAG JuJutsu Q&A")
st.title("🤖 JuJutsu AI - Ask Anything")
st.markdown("Ask a question about jujutsu history, techniques, or philosophy.")
question = st.text_input("❓ Enter your question:")
mode = st.radio("Choose response mode:", ["No RAG", "With RAG", "With RAG + Expert Prompt"])
if st.button("Get Answer") and question:
if mode == "No RAG":
answer = chat_no_rag(question)
else:
retrieved = search(question)
if mode == "With RAG":
answer = chat_with_rag(question, retrieved)
else:
answer = chat_with_rag_enhanced(question, retrieved)
st.markdown("### 🧠 Answer")
st.write(answer)
|