BuildNg commited on
Commit
44d337e
·
1 Parent(s): b4f989a
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI + Gradio combo app for Hugging Face Space
3
+ """
4
+ import os, gradio as gr
5
+ from fastapi import FastAPI, Header, HTTPException
6
+ from pydantic import BaseModel
7
+ from rag import retrieve_info # your existing function
8
+
9
+ # -------- 1) Private key list ----------
10
+ VALID_KEYS = set(os.getenv("RAG_KEYS", "alpha,beta").split(",")) # store secret in Space > Settings > Secrets
11
+
12
+ # -------- 2) FastAPI core app ----------
13
+ app = FastAPI(title="Moffitt RAG API")
14
+
15
+ class QueryIn(BaseModel):
16
+ query: str
17
+ k: int = 5
18
+
19
+ @app.post("/v1/query")
20
+ async def rag_query(body: QueryIn, x_api_key: str = Header(None)):
21
+ """
22
+ Secure JSON endpoint.
23
+ Caller must send: X-API-Key: <one-of-valid-keys>
24
+ """
25
+ if x_api_key not in VALID_KEYS:
26
+ raise HTTPException(status_code=401, detail="Invalid or missing X-API-Key")
27
+
28
+ text = retrieve_info(body.query, body.k)
29
+ return {"answer": text}
30
+
31
+ # -------- 3) Public Gradio UI ----------
32
+ def run(q, k):
33
+ return retrieve_info(q, int(k))
34
+
35
+ demo = gr.Interface(
36
+ fn=run,
37
+ inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
38
+ outputs=gr.Textbox(lines=25, label="Retrieved chunks"),
39
+ allow_flagging="never",
40
+ title="Moffitt RAG Demo",
41
+ description="Type a question; we search Chroma with E5 embeddings."
42
+ )
43
+
44
+ # Mount Gradio at ROOT path "/"
45
+ if __name__ == "__main__":
46
+ demo.launch()
47
+
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b97478e7c7ceec00d978669a0594a856756c91815c10abf5a9ec2269162a06a
3
+ size 32120000
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9143bae7b81c127da7707ac7626f296457404dd366b7e0ce40d2f900ae21d8cb
3
+ size 100
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3caa649a549e8e441b1a32fb57b1616f5089ef44d3efc54e9a5737cdd33718
3
+ size 125068
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb6f471778ae0d114debd720ff7831af68fb2737ed858558f828e647981699e
3
+ size 40000
chroma_db/6eaba295-86d1-4c64-9b6e-f3f0d54f5cac/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e1ce593ebbb113202ce280877fdb67ac5ba31a6ab3b82500da6bb417897260
3
+ size 12164
chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811181a70c31391199665bec074d9e108cde48a15c52dec83c6b78cd072e261e
3
+ size 9785344
demo_client.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple CLI for the Moffitt RAG Space.
4
+
5
+ Before running, run the following on your terminal:
6
+ pip install requests python-dotenv
7
+
8
+ Then, in the same folder as this file demo_client.py, create a file named .env
9
+ Add in it the following line
10
+
11
+ RAG_API_KEY=password (replace the actual password here, no "" symbols needed)
12
+
13
+ Usage:
14
+ python demo_client.py "What are the common types of cancer" -k 5
15
+
16
+ Environment variables:
17
+ RAG_API_KEY Your X-API-Key header value (e.g. "alpha")
18
+ RAG_API_URL (optional) override the default Space URL
19
+ """
20
+ import argparse
21
+ import os
22
+ import sys
23
+ import requests
24
+ from dotenv import load_dotenv
25
+
26
+ load_dotenv()
27
+ DEFAULT_URL = "https://buildng-moffitt-rag-demo.hf.space/v1/query"
28
+
29
+
30
+ def call_rag_api(question: str, k: int = 5, url: str = DEFAULT_URL) -> str:
31
+ api_key = os.getenv("RAG_API_KEY")
32
+ if not api_key:
33
+ sys.exit("RAG_API_KEY not set in environment")
34
+
35
+ payload = {"query": question, "k": k}
36
+ headers = {
37
+ "Content-Type": "application/json",
38
+ "X-API-Key": api_key,
39
+ }
40
+
41
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
42
+ response.raise_for_status() # 4xx/5xx → raises HTTPError
43
+ return response.json()["answer"]
44
+
45
+ def main() -> None:
46
+ parser = argparse.ArgumentParser(description="Query the Moffitt RAG API")
47
+ parser.add_argument("question", help="Your natural-language question")
48
+ parser.add_argument("-k", type=int, default=5, help="Top-k passages to retrieve (default: 5)")
49
+ args = parser.parse_args()
50
+
51
+ answer = call_rag_api(args.question, args.k, DEFAULT_URL)
52
+ print("\n=== Answer ===\n")
53
+ print(answer)
54
+
55
+ if __name__ == "__main__":
56
+ main()
57
+
58
+ """
59
+ If you want to integrate this with your ChatGPT API,
60
+ Reuse the same call_rag_api functions,
61
+ and when you need it, simply call
62
+ call_rag_api(question, k, DEFAULT_URL)
63
+ question is a string, your question, like "what is the risk of blood bone marrow transplant?"
64
+ k is an integer. Keep it a small integer (<10)
65
+ DEFAULT_URL is the variable above
66
+ """
rag.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ from huggingface_hub import hf_hub_download
4
+ from sentence_transformers import SentenceTransformer
5
+ from langchain_chroma import Chroma
6
+ import torch
7
+ import gradio as gr
8
+
9
+
10
+ class SentenceTransformerWrapper:
11
+ def __init__(self, model_name):
12
+ self.model = SentenceTransformer(model_name)
13
+
14
+ def embed_documents(self, texts):
15
+ # Convert the list of texts to embeddings
16
+ return self.model.encode(texts, show_progress_bar=True).tolist()
17
+
18
+ def embed_query(self, text):
19
+ # Convert a single query to its embedding
20
+ return self.model.encode(text).tolist()
21
+
22
+ persist_directory = "chroma_db"
23
+ embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2")
24
+
25
+ vector_db = Chroma(
26
+ persist_directory=persist_directory,
27
+ embedding_function=embedding_model,
28
+ )
29
+
30
+ def retrieve_info(query, k=5):
31
+ results = vector_db.similarity_search(query, k)
32
+ output = ""
33
+ for i, doc in enumerate(results):
34
+ output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n"
35
+ return output
36
+
37
+
38
+ demo = gr.Interface(
39
+ fn=retrieve_info,
40
+ inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
41
+ outputs=[
42
+ gr.Textbox(label="Output from DenseRetriever", lines=25),
43
+ ],
44
+ )
45
+
46
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ gradio
4
+ sentence-transformers
5
+ torch
6
+ langchain-chroma
7
+ chromadb
8
+ python-dotenv
space.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # space.yaml (must live at the root of the repo)
2
+ sdk: fastapi # tells Spaces to launch with uvicorn
3
+ python_version: "3.10" # optional; 3.10 by default
4
+ entrypoint: app:app # module:variable (app.py defines 'app')