Spaces:
Running
Running
BuildNg
commited on
Commit
·
44d337e
1
Parent(s):
b4f989a
changes
Browse files- .gitignore +1 -0
- app.py +47 -0
- chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/data_level0.bin +3 -0
- chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/header.bin +3 -0
- chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/index_metadata.pickle +3 -0
- chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/length.bin +3 -0
- chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/link_lists.bin +3 -0
- chroma_db/chroma.sqlite3 +3 -0
- demo_client.py +66 -0
- rag.py +46 -0
- requirements.txt +8 -0
- space.yaml +4 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
FastAPI + Gradio combo app for Hugging Face Space
|
3 |
+
"""
|
4 |
+
import os, gradio as gr
|
5 |
+
from fastapi import FastAPI, Header, HTTPException
|
6 |
+
from pydantic import BaseModel
|
7 |
+
from rag import retrieve_info # your existing function
|
8 |
+
|
9 |
+
# -------- 1) Private key list ----------
|
10 |
+
VALID_KEYS = set(os.getenv("RAG_KEYS", "alpha,beta").split(",")) # store secret in Space > Settings > Secrets
|
11 |
+
|
12 |
+
# -------- 2) FastAPI core app ----------
|
13 |
+
app = FastAPI(title="Moffitt RAG API")
|
14 |
+
|
15 |
+
class QueryIn(BaseModel):
|
16 |
+
query: str
|
17 |
+
k: int = 5
|
18 |
+
|
19 |
+
@app.post("/v1/query")
|
20 |
+
async def rag_query(body: QueryIn, x_api_key: str = Header(None)):
|
21 |
+
"""
|
22 |
+
Secure JSON endpoint.
|
23 |
+
Caller must send: X-API-Key: <one-of-valid-keys>
|
24 |
+
"""
|
25 |
+
if x_api_key not in VALID_KEYS:
|
26 |
+
raise HTTPException(status_code=401, detail="Invalid or missing X-API-Key")
|
27 |
+
|
28 |
+
text = retrieve_info(body.query, body.k)
|
29 |
+
return {"answer": text}
|
30 |
+
|
31 |
+
# -------- 3) Public Gradio UI ----------
|
32 |
+
def run(q, k):
|
33 |
+
return retrieve_info(q, int(k))
|
34 |
+
|
35 |
+
demo = gr.Interface(
|
36 |
+
fn=run,
|
37 |
+
inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
|
38 |
+
outputs=gr.Textbox(lines=25, label="Retrieved chunks"),
|
39 |
+
allow_flagging="never",
|
40 |
+
title="Moffitt RAG Demo",
|
41 |
+
description="Type a question; we search Chroma with E5 embeddings."
|
42 |
+
)
|
43 |
+
|
44 |
+
# Mount Gradio at ROOT path "/"
|
45 |
+
if __name__ == "__main__":
|
46 |
+
demo.launch()
|
47 |
+
|
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b97478e7c7ceec00d978669a0594a856756c91815c10abf5a9ec2269162a06a
|
3 |
+
size 32120000
|
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9143bae7b81c127da7707ac7626f296457404dd366b7e0ce40d2f900ae21d8cb
|
3 |
+
size 100
|
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af3caa649a549e8e441b1a32fb57b1616f5089ef44d3efc54e9a5737cdd33718
|
3 |
+
size 125068
|
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bb6f471778ae0d114debd720ff7831af68fb2737ed858558f828e647981699e
|
3 |
+
size 40000
|
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1e1ce593ebbb113202ce280877fdb67ac5ba31a6ab3b82500da6bb417897260
|
3 |
+
size 12164
|
chroma_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:811181a70c31391199665bec074d9e108cde48a15c52dec83c6b78cd072e261e
|
3 |
+
size 9785344
|
demo_client.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Simple CLI for the Moffitt RAG Space.
|
4 |
+
|
5 |
+
Before running, run the following on your terminal:
|
6 |
+
pip install requests python-dotenv
|
7 |
+
|
8 |
+
Then, in the same folder as this file demo_client.py, create a file named .env
|
9 |
+
Add in it the following line
|
10 |
+
|
11 |
+
RAG_API_KEY=password (replace the actual password here, no "" symbols needed)
|
12 |
+
|
13 |
+
Usage:
|
14 |
+
python demo_client.py "What are the common types of cancer" -k 5
|
15 |
+
|
16 |
+
Environment variables:
|
17 |
+
RAG_API_KEY Your X-API-Key header value (e.g. "alpha")
|
18 |
+
RAG_API_URL (optional) override the default Space URL
|
19 |
+
"""
|
20 |
+
import argparse
|
21 |
+
import os
|
22 |
+
import sys
|
23 |
+
import requests
|
24 |
+
from dotenv import load_dotenv
|
25 |
+
|
26 |
+
load_dotenv()
|
27 |
+
DEFAULT_URL = "https://buildng-moffitt-rag-demo.hf.space/v1/query"
|
28 |
+
|
29 |
+
|
30 |
+
def call_rag_api(question: str, k: int = 5, url: str = DEFAULT_URL) -> str:
|
31 |
+
api_key = os.getenv("RAG_API_KEY")
|
32 |
+
if not api_key:
|
33 |
+
sys.exit("RAG_API_KEY not set in environment")
|
34 |
+
|
35 |
+
payload = {"query": question, "k": k}
|
36 |
+
headers = {
|
37 |
+
"Content-Type": "application/json",
|
38 |
+
"X-API-Key": api_key,
|
39 |
+
}
|
40 |
+
|
41 |
+
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
42 |
+
response.raise_for_status() # 4xx/5xx → raises HTTPError
|
43 |
+
return response.json()["answer"]
|
44 |
+
|
45 |
+
def main() -> None:
|
46 |
+
parser = argparse.ArgumentParser(description="Query the Moffitt RAG API")
|
47 |
+
parser.add_argument("question", help="Your natural-language question")
|
48 |
+
parser.add_argument("-k", type=int, default=5, help="Top-k passages to retrieve (default: 5)")
|
49 |
+
args = parser.parse_args()
|
50 |
+
|
51 |
+
answer = call_rag_api(args.question, args.k, DEFAULT_URL)
|
52 |
+
print("\n=== Answer ===\n")
|
53 |
+
print(answer)
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
main()
|
57 |
+
|
58 |
+
"""
|
59 |
+
If you want to integrate this with your ChatGPT API,
|
60 |
+
Reuse the same call_rag_api functions,
|
61 |
+
and when you need it, simply call
|
62 |
+
call_rag_api(question, k, DEFAULT_URL)
|
63 |
+
question is a string, your question, like "what is the risk of blood bone marrow transplant?"
|
64 |
+
k is an integer. Keep it a small integer (<10)
|
65 |
+
DEFAULT_URL is the variable above
|
66 |
+
"""
|
rag.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import zipfile
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from langchain_chroma import Chroma
|
6 |
+
import torch
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
|
10 |
+
class SentenceTransformerWrapper:
|
11 |
+
def __init__(self, model_name):
|
12 |
+
self.model = SentenceTransformer(model_name)
|
13 |
+
|
14 |
+
def embed_documents(self, texts):
|
15 |
+
# Convert the list of texts to embeddings
|
16 |
+
return self.model.encode(texts, show_progress_bar=True).tolist()
|
17 |
+
|
18 |
+
def embed_query(self, text):
|
19 |
+
# Convert a single query to its embedding
|
20 |
+
return self.model.encode(text).tolist()
|
21 |
+
|
22 |
+
persist_directory = "chroma_db"
|
23 |
+
embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2")
|
24 |
+
|
25 |
+
vector_db = Chroma(
|
26 |
+
persist_directory=persist_directory,
|
27 |
+
embedding_function=embedding_model,
|
28 |
+
)
|
29 |
+
|
30 |
+
def retrieve_info(query, k=5):
|
31 |
+
results = vector_db.similarity_search(query, k)
|
32 |
+
output = ""
|
33 |
+
for i, doc in enumerate(results):
|
34 |
+
output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n"
|
35 |
+
return output
|
36 |
+
|
37 |
+
|
38 |
+
demo = gr.Interface(
|
39 |
+
fn=retrieve_info,
|
40 |
+
inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
|
41 |
+
outputs=[
|
42 |
+
gr.Textbox(label="Output from DenseRetriever", lines=25),
|
43 |
+
],
|
44 |
+
)
|
45 |
+
|
46 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn[standard]
|
3 |
+
gradio
|
4 |
+
sentence-transformers
|
5 |
+
torch
|
6 |
+
langchain-chroma
|
7 |
+
chromadb
|
8 |
+
python-dotenv
|
space.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# space.yaml (must live at the root of the repo)
|
2 |
+
sdk: fastapi # tells Spaces to launch with uvicorn
|
3 |
+
python_version: "3.10" # optional; 3.10 by default
|
4 |
+
entrypoint: app:app # module:variable (app.py defines 'app')
|