Spaces:
Sleeping
Sleeping
Merge pull request #2 from SunbirdAI/api
Browse files- .gitattributes +1 -0
- .gitignore +1 -0
- README.md +39 -2
- app.py +61 -40
- study_files.json +0 -5
- utils/db.py +2 -2
- utils/helpers.py +27 -0
.gitattributes
CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
36 |
*db* filter=lfs diff=lfs merge=lfs -text
|
37 |
vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
|
38 |
*.db filter=lfs diff=lfs merge=lfs -text
|
|
|
|
36 |
*db* filter=lfs diff=lfs merge=lfs -text
|
37 |
vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
|
38 |
*.db filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -179,3 +179,4 @@ study_files.db
|
|
179 |
study_files.json
|
180 |
|
181 |
infra/ecs_config.toml
|
|
|
|
179 |
study_files.json
|
180 |
|
181 |
infra/ecs_config.toml
|
182 |
+
aws-cli.pdf
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 👁
|
|
4 |
colorFrom: gray
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
@@ -60,6 +60,15 @@ gradio app.py
|
|
60 |
|
61 |
Browse the application with the link `http://localhost:7860/`
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
## Run with docker
|
65 |
To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
|
@@ -84,12 +93,21 @@ docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network
|
|
84 |
|
85 |
Browse the application with the link `http://localhost:7860/`
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
## Deploy to AWS ECS (Elastic Container Service) with Fargate
|
89 |
|
90 |
Install and configure the AWS CLI and aws credentials. See [link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
|
91 |
|
92 |
-
OR: See the pdf document [here](
|
93 |
|
94 |
Now follow the steps below to deploy to AWS ECS
|
95 |
|
@@ -149,6 +167,25 @@ docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
|
|
149 |
docker push "${ECR_BACKEND_GRADIO_URL}:latest"
|
150 |
```
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
|
153 |
|
154 |
#### Install
|
|
|
4 |
colorFrom: gray
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.6.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
60 |
|
61 |
Browse the application with the link `http://localhost:7860/`
|
62 |
|
63 |
+
### Run the api
|
64 |
+
Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
|
65 |
+
|
66 |
+
```sh
|
67 |
+
uvicorn api:app --reload
|
68 |
+
```
|
69 |
+
|
70 |
+
Browse the api at `http://localhost:8000/docs`
|
71 |
+
|
72 |
|
73 |
## Run with docker
|
74 |
To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
|
|
|
93 |
|
94 |
Browse the application with the link `http://localhost:7860/`
|
95 |
|
96 |
+
To run the api with docker run the commands below. The gradio container should be run first before running the api.
|
97 |
+
|
98 |
+
```sh
|
99 |
+
docker build -f Dockerfile.api -t fastapi-app .
|
100 |
+
docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
|
101 |
+
```
|
102 |
+
|
103 |
+
Browse the api at `http://localhost:8000/docs`
|
104 |
+
|
105 |
|
106 |
## Deploy to AWS ECS (Elastic Container Service) with Fargate
|
107 |
|
108 |
Install and configure the AWS CLI and aws credentials. See [link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
|
109 |
|
110 |
+
OR: See the pdf document [here](https://docs.aws.amazon.com/pdfs/cli/latest/userguide/aws-cli.pdf#getting-started-quickstart)
|
111 |
|
112 |
Now follow the steps below to deploy to AWS ECS
|
113 |
|
|
|
167 |
docker push "${ECR_BACKEND_GRADIO_URL}:latest"
|
168 |
```
|
169 |
|
170 |
+
- Now create fastapi repostory
|
171 |
+
|
172 |
+
```sh
|
173 |
+
aws ecr create-repository \
|
174 |
+
--repository-name fastapi-api-prod \
|
175 |
+
--image-tag-mutability MUTABLE
|
176 |
+
|
177 |
+
export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
|
178 |
+
echo $ECR_BACKEND_FASTAPI_URL
|
179 |
+
```
|
180 |
+
|
181 |
+
- Build the docker image for the production and push to ECR
|
182 |
+
|
183 |
+
```sh
|
184 |
+
docker build -f Dockerfile.api.prod -t fastapi-api-prod .
|
185 |
+
docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
|
186 |
+
docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
|
187 |
+
```
|
188 |
+
|
189 |
### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
|
190 |
|
191 |
#### Install
|
app.py
CHANGED
@@ -29,11 +29,14 @@ from utils.helpers import (
|
|
29 |
add_study_files_to_chromadb,
|
30 |
append_to_study_files,
|
31 |
chromadb_client,
|
|
|
32 |
)
|
33 |
from utils.pdf_processor import PDFProcessor
|
34 |
from utils.prompts import evidence_based_prompt, highlight_prompt
|
35 |
from utils.zotero_manager import ZoteroManager
|
36 |
|
|
|
|
|
37 |
# Configure logging
|
38 |
logging.basicConfig(level=logging.INFO)
|
39 |
logger = logging.getLogger(__name__)
|
@@ -53,29 +56,25 @@ rag_cache = {}
|
|
53 |
|
54 |
cache = LRUCache(maxsize=100)
|
55 |
|
56 |
-
# with open("study_files.json", "w") as file:
|
57 |
-
# data_ = {}
|
58 |
-
# json.dump(data_, file, indent=4)
|
59 |
-
|
60 |
|
61 |
def get_cache_value(key):
|
62 |
return cache.get(key)
|
63 |
|
64 |
|
65 |
zotero_library_id = get_cache_value("zotero_library_id")
|
66 |
-
logger.info(f"zotero_library_id: {zotero_library_id}")
|
67 |
|
68 |
|
69 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
70 |
"""Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
|
71 |
if study_name not in rag_cache:
|
72 |
-
|
73 |
-
result = collection.get(ids=[study_name]) # Retrieve document by ID
|
74 |
|
75 |
-
if not
|
76 |
raise ValueError(f"Invalid study name: {study_name}")
|
77 |
|
78 |
-
study_file =
|
|
|
79 |
if not study_file:
|
80 |
raise ValueError(f"File path not found for study name: {study_name}")
|
81 |
|
@@ -95,14 +94,10 @@ def get_study_info(study_name: str | list) -> str:
|
|
95 |
study = get_study_file_by_name(study_name)
|
96 |
logger.info(f"Study: {study}")
|
97 |
|
98 |
-
|
99 |
-
result = collection.get(ids=[study_name]) # Query by study name (as a list)
|
100 |
-
logger.info(f"Result: {result}")
|
101 |
-
|
102 |
-
if not result or len(result["metadatas"]) == 0:
|
103 |
raise ValueError(f"Invalid study name: {study_name}")
|
104 |
|
105 |
-
study_file =
|
106 |
logger.info(f"study_file: {study_file}")
|
107 |
if not study_file:
|
108 |
raise ValueError(f"File path not found for study name: {study_name}")
|
@@ -244,22 +239,36 @@ def process_zotero_library_items(
|
|
244 |
return message
|
245 |
|
246 |
|
|
|
|
|
|
|
|
|
|
|
247 |
def refresh_study_choices():
|
248 |
"""
|
249 |
Refresh study choices for a specific dropdown instance.
|
250 |
|
251 |
:return: Updated Dropdown with current study choices
|
252 |
"""
|
253 |
-
global study_choices
|
254 |
zotero_library_id = get_cache_value("zotero_library_id")
|
255 |
-
logger.info(f"zotero_library_id: {zotero_library_id}")
|
256 |
study_choices = [
|
257 |
file.name for file in get_study_files_by_library_id([zotero_library_id])
|
258 |
]
|
259 |
-
logger.info(f"Study choices: {study_choices}")
|
260 |
return study_choices
|
261 |
|
262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
def process_multi_input(text, study_name, prompt_type):
|
264 |
# Split input based on commas and strip any extra spaces
|
265 |
variable_list = [word.strip().upper() for word in text.split(",")]
|
@@ -289,8 +298,6 @@ def download_as_csv(markdown_content):
|
|
289 |
|
290 |
|
291 |
# PDF Support
|
292 |
-
|
293 |
-
|
294 |
def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
|
295 |
"""Process uploaded PDF files and add them to the system."""
|
296 |
if not files or not collection_name:
|
@@ -391,46 +398,60 @@ def create_gr_interface() -> gr.Blocks:
|
|
391 |
process_zotero_btn = gr.Button("Process your Zotero Library")
|
392 |
zotero_output = gr.Markdown(label="Zotero")
|
393 |
|
394 |
-
gr.
|
395 |
-
|
396 |
-
"study_files_collection"
|
397 |
)
|
398 |
-
all_documents = collection.query(
|
399 |
-
query_texts=[""], n_results=1000
|
400 |
-
)
|
401 |
-
study_choices = [
|
402 |
-
doc_id
|
403 |
-
for doc_id in all_documents.get("ids")[0]
|
404 |
-
if all_documents
|
405 |
-
]
|
406 |
|
407 |
-
|
|
|
408 |
zotero_library_id = zotero_library_id_param.value
|
409 |
if zotero_library_id is None:
|
410 |
zotero_library_id = get_cache_value("zotero_library_id")
|
411 |
logger.info(f"zotero_library_id: =====> {zotero_library_id}")
|
412 |
-
|
413 |
-
|
414 |
-
)
|
415 |
-
logger.info(f"study_choices_db: =====> {study_choices_db}")
|
416 |
-
study_files = get_all_study_files()
|
417 |
-
logger.info(f"study_files: =====> {study_files}")
|
418 |
|
419 |
study_dropdown = gr.Dropdown(
|
420 |
choices=study_choices,
|
421 |
label="Select Study",
|
422 |
value=(study_choices[0] if study_choices else None),
|
|
|
423 |
)
|
424 |
# In Gradio interface setup
|
425 |
refresh_button = gr.Button("Refresh Studies")
|
426 |
|
427 |
study_info = gr.Markdown(label="Study Details")
|
|
|
428 |
prompt_type = gr.Radio(
|
429 |
["Default", "Highlight", "Evidence-based"],
|
430 |
label="Prompt Type",
|
431 |
value="Default",
|
432 |
)
|
433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
with gr.Column(scale=3):
|
435 |
gr.Markdown("### Study Variables")
|
436 |
with gr.Row():
|
@@ -512,8 +533,8 @@ def create_gr_interface() -> gr.Blocks:
|
|
512 |
).then(fn=cleanup_temp_files, inputs=None, outputs=None)
|
513 |
|
514 |
refresh_button.click(
|
515 |
-
fn=
|
516 |
-
outputs=[
|
517 |
)
|
518 |
|
519 |
# Event handlers for PDF Chat tab
|
|
|
29 |
add_study_files_to_chromadb,
|
30 |
append_to_study_files,
|
31 |
chromadb_client,
|
32 |
+
create_directory,
|
33 |
)
|
34 |
from utils.pdf_processor import PDFProcessor
|
35 |
from utils.prompts import evidence_based_prompt, highlight_prompt
|
36 |
from utils.zotero_manager import ZoteroManager
|
37 |
|
38 |
+
data_directory = "data"
|
39 |
+
create_directory(data_directory)
|
40 |
# Configure logging
|
41 |
logging.basicConfig(level=logging.INFO)
|
42 |
logger = logging.getLogger(__name__)
|
|
|
56 |
|
57 |
cache = LRUCache(maxsize=100)
|
58 |
|
|
|
|
|
|
|
|
|
59 |
|
60 |
def get_cache_value(key):
|
61 |
return cache.get(key)
|
62 |
|
63 |
|
64 |
zotero_library_id = get_cache_value("zotero_library_id")
|
65 |
+
logger.info(f"zotero_library_id cache: {zotero_library_id}")
|
66 |
|
67 |
|
68 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
69 |
"""Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
|
70 |
if study_name not in rag_cache:
|
71 |
+
study = get_study_file_by_name(study_name)
|
|
|
72 |
|
73 |
+
if not study:
|
74 |
raise ValueError(f"Invalid study name: {study_name}")
|
75 |
|
76 |
+
study_file = study.file_path
|
77 |
+
logger.info(f"study_file: {study_file}")
|
78 |
if not study_file:
|
79 |
raise ValueError(f"File path not found for study name: {study_name}")
|
80 |
|
|
|
94 |
study = get_study_file_by_name(study_name)
|
95 |
logger.info(f"Study: {study}")
|
96 |
|
97 |
+
if not study:
|
|
|
|
|
|
|
|
|
98 |
raise ValueError(f"Invalid study name: {study_name}")
|
99 |
|
100 |
+
study_file = study.file_path
|
101 |
logger.info(f"study_file: {study_file}")
|
102 |
if not study_file:
|
103 |
raise ValueError(f"File path not found for study name: {study_name}")
|
|
|
239 |
return message
|
240 |
|
241 |
|
242 |
+
process_zotero_library_items(
|
243 |
+
os.getenv("ZOTERO_LIBRARY_ID"), os.getenv("ZOTERO_API_ACCESS_KEY")
|
244 |
+
)
|
245 |
+
|
246 |
+
|
247 |
def refresh_study_choices():
|
248 |
"""
|
249 |
Refresh study choices for a specific dropdown instance.
|
250 |
|
251 |
:return: Updated Dropdown with current study choices
|
252 |
"""
|
253 |
+
global study_choices, zotero_library_id
|
254 |
zotero_library_id = get_cache_value("zotero_library_id")
|
255 |
+
logger.info(f"zotero_library_id refreshed: {zotero_library_id}")
|
256 |
study_choices = [
|
257 |
file.name for file in get_study_files_by_library_id([zotero_library_id])
|
258 |
]
|
259 |
+
logger.info(f"Study choices refreshed: {study_choices}")
|
260 |
return study_choices
|
261 |
|
262 |
|
263 |
+
def new_study_choices():
|
264 |
+
"""
|
265 |
+
Refresh study choices for a specific dropdown instance.
|
266 |
+
"""
|
267 |
+
study_choices = refresh_study_choices()
|
268 |
+
study_choices = ", ".join(study_choices)
|
269 |
+
return f"**Your studies are: {study_choices}**"
|
270 |
+
|
271 |
+
|
272 |
def process_multi_input(text, study_name, prompt_type):
|
273 |
# Split input based on commas and strip any extra spaces
|
274 |
variable_list = [word.strip().upper() for word in text.split(",")]
|
|
|
298 |
|
299 |
|
300 |
# PDF Support
|
|
|
|
|
301 |
def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
|
302 |
"""Process uploaded PDF files and add them to the system."""
|
303 |
if not files or not collection_name:
|
|
|
398 |
process_zotero_btn = gr.Button("Process your Zotero Library")
|
399 |
zotero_output = gr.Markdown(label="Zotero")
|
400 |
|
401 |
+
local_storage_state = gr.BrowserState(
|
402 |
+
{"zotero_library_id": "", "study_choices": []}
|
|
|
403 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
+
gr.Markdown("### Study Information")
|
406 |
+
|
407 |
zotero_library_id = zotero_library_id_param.value
|
408 |
if zotero_library_id is None:
|
409 |
zotero_library_id = get_cache_value("zotero_library_id")
|
410 |
logger.info(f"zotero_library_id: =====> {zotero_library_id}")
|
411 |
+
study_choices = refresh_study_choices()
|
412 |
+
logger.info(f"study_choices_db: =====> {study_choices}")
|
|
|
|
|
|
|
|
|
413 |
|
414 |
study_dropdown = gr.Dropdown(
|
415 |
choices=study_choices,
|
416 |
label="Select Study",
|
417 |
value=(study_choices[0] if study_choices else None),
|
418 |
+
allow_custom_value=True,
|
419 |
)
|
420 |
# In Gradio interface setup
|
421 |
refresh_button = gr.Button("Refresh Studies")
|
422 |
|
423 |
study_info = gr.Markdown(label="Study Details")
|
424 |
+
new_studies = gr.Markdown(label="Your Studies")
|
425 |
prompt_type = gr.Radio(
|
426 |
["Default", "Highlight", "Evidence-based"],
|
427 |
label="Prompt Type",
|
428 |
value="Default",
|
429 |
)
|
430 |
|
431 |
+
@demo.load(
|
432 |
+
inputs=[local_storage_state],
|
433 |
+
outputs=[zotero_library_id_param],
|
434 |
+
)
|
435 |
+
def load_from_local_storage(saved_values):
|
436 |
+
print("loading from local storage", saved_values)
|
437 |
+
return saved_values.get("zotero_library_id")
|
438 |
+
|
439 |
+
@gr.on(
|
440 |
+
[
|
441 |
+
zotero_library_id_param.change,
|
442 |
+
process_zotero_btn.click,
|
443 |
+
refresh_button.click,
|
444 |
+
],
|
445 |
+
inputs=[zotero_library_id_param],
|
446 |
+
outputs=[local_storage_state],
|
447 |
+
)
|
448 |
+
def save_to_local_storage(zotero_library_id_param):
|
449 |
+
study_choices = refresh_study_choices()
|
450 |
+
return {
|
451 |
+
"zotero_library_id": zotero_library_id_param,
|
452 |
+
"study_choices": study_choices,
|
453 |
+
}
|
454 |
+
|
455 |
with gr.Column(scale=3):
|
456 |
gr.Markdown("### Study Variables")
|
457 |
with gr.Row():
|
|
|
533 |
).then(fn=cleanup_temp_files, inputs=None, outputs=None)
|
534 |
|
535 |
refresh_button.click(
|
536 |
+
fn=new_study_choices,
|
537 |
+
outputs=[new_studies], # Update the same dropdown
|
538 |
)
|
539 |
|
540 |
# Event handlers for PDF Chat tab
|
study_files.json
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
|
3 |
-
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
4 |
-
"GeneXpert": "data/gene_xpert_zotero_items.json"
|
5 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
utils/db.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fc6c599c827559f1eb0b001f4a132109b004ae3d12851ac2e2327492a323e44
|
3 |
+
size 4968
|
utils/helpers.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# utils/helpers.py
|
2 |
|
3 |
import json
|
|
|
4 |
from typing import Any, Dict, List
|
5 |
|
6 |
import chromadb
|
@@ -195,6 +196,9 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
|
|
195 |
print(f"File '{file_path}' not found.")
|
196 |
return
|
197 |
|
|
|
|
|
|
|
198 |
# Get or create the collection in ChromaDB
|
199 |
collection = chromadb_client.get_or_create_collection(collection_name)
|
200 |
|
@@ -215,6 +219,29 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
|
|
215 |
print("All study files have been successfully added to ChromaDB.")
|
216 |
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
if __name__ == "__main__":
|
219 |
# Usage example
|
220 |
add_study_files_to_chromadb("study_files.json", "study_files_collection")
|
|
|
1 |
# utils/helpers.py
|
2 |
|
3 |
import json
|
4 |
+
import os
|
5 |
from typing import Any, Dict, List
|
6 |
|
7 |
import chromadb
|
|
|
196 |
print(f"File '{file_path}' not found.")
|
197 |
return
|
198 |
|
199 |
+
if not study_files_data:
|
200 |
+
return
|
201 |
+
|
202 |
# Get or create the collection in ChromaDB
|
203 |
collection = chromadb_client.get_or_create_collection(collection_name)
|
204 |
|
|
|
219 |
print("All study files have been successfully added to ChromaDB.")
|
220 |
|
221 |
|
222 |
+
def create_directory(directory_path):
|
223 |
+
"""
|
224 |
+
Create a directory.
|
225 |
+
Does not raise an error if the directory already exists.
|
226 |
+
|
227 |
+
Args:
|
228 |
+
directory_path (str): Path of the directory to create
|
229 |
+
|
230 |
+
Returns:
|
231 |
+
bool: True if directory was created or already exists, False if creation failed
|
232 |
+
"""
|
233 |
+
try:
|
234 |
+
# Use exist_ok=True to prevent error if directory exists
|
235 |
+
os.makedirs(directory_path, exist_ok=True)
|
236 |
+
return True
|
237 |
+
except PermissionError:
|
238 |
+
print(f"Permission denied: Cannot create directory {directory_path}")
|
239 |
+
return False
|
240 |
+
except Exception as e:
|
241 |
+
print(f"An unexpected error occurred: {e}")
|
242 |
+
return False
|
243 |
+
|
244 |
+
|
245 |
if __name__ == "__main__":
|
246 |
# Usage example
|
247 |
add_study_files_to_chromadb("study_files.json", "study_files_collection")
|