Spaces:

Sunbird
/

acres

Sleeping

App Files Files Community

Walukagga Patrick commited on Nov 27, 2024

Commit

98a852a

2 Parent(s): 5848a2e 622dd84

Merge pull request #2 from SunbirdAI/api

Browse files

Files changed (7) hide show

.gitattributes +1 -0
.gitignore +1 -0
README.md +39 -2
app.py +61 -40
study_files.json +0 -5
utils/db.py +2 -2
utils/helpers.py +27 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *db* filter=lfs diff=lfs merge=lfs -text
 vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
 *.db filter=lfs diff=lfs merge=lfs -text

 *db* filter=lfs diff=lfs merge=lfs -text
 vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
 *.db filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -179,3 +179,4 @@ study_files.db
 study_files.json
 infra/ecs_config.toml

 study_files.json
 infra/ecs_config.toml
+aws-cli.pdf

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 👁
 colorFrom: gray
 colorTo: pink
 sdk: gradio
-sdk_version: 4.42.0
 app_file: app.py
 pinned: false
 license: apache-2.0
@@ -60,6 +60,15 @@ gradio app.py
 Browse the application with the link `http://localhost:7860/`
 ## Run with docker
 To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
@@ -84,12 +93,21 @@ docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network
 Browse the application with the link `http://localhost:7860/`
 ## Deploy to AWS ECS (Elastic Container Service) with Fargate
 Install and configure the AWS CLI and aws credentials. See [link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
-OR: See the pdf document [here](./aws-cli.pdf)
 Now follow the steps below to deploy to AWS ECS
@@ -149,6 +167,25 @@ docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
 docker push "${ECR_BACKEND_GRADIO_URL}:latest"
 ```
 ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
 #### Install

 colorFrom: gray
 colorTo: pink
 sdk: gradio
+sdk_version: 5.6.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 Browse the application with the link `http://localhost:7860/`
+### Run the api
+Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
+```sh
+uvicorn api:app --reload
+```
+Browse the api at `http://localhost:8000/docs`
 ## Run with docker
 To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
 Browse the application with the link `http://localhost:7860/`
+To run the api with docker run the commands below. The gradio container should be run first before running the api.
+```sh
+docker build -f Dockerfile.api -t fastapi-app .
+docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
+```
+Browse the api at `http://localhost:8000/docs`
 ## Deploy to AWS ECS (Elastic Container Service) with Fargate
 Install and configure the AWS CLI and aws credentials. See [link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
+OR: See the pdf document [here](https://docs.aws.amazon.com/pdfs/cli/latest/userguide/aws-cli.pdf#getting-started-quickstart)
 Now follow the steps below to deploy to AWS ECS
 docker push "${ECR_BACKEND_GRADIO_URL}:latest"
 ```
+- Now create fastapi repostory
+```sh
+aws ecr create-repository \
+  --repository-name fastapi-api-prod \
+  --image-tag-mutability MUTABLE
+export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
+echo $ECR_BACKEND_FASTAPI_URL
+```
+- Build the docker image for the production and push to ECR
+```sh
+docker build -f Dockerfile.api.prod -t fastapi-api-prod .
+docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
+docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
+```
 ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
 #### Install

app.py CHANGED Viewed

@@ -29,11 +29,14 @@ from utils.helpers import (
     add_study_files_to_chromadb,
     append_to_study_files,
     chromadb_client,
 )
 from utils.pdf_processor import PDFProcessor
 from utils.prompts import evidence_based_prompt, highlight_prompt
 from utils.zotero_manager import ZoteroManager
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -53,29 +56,25 @@ rag_cache = {}
 cache = LRUCache(maxsize=100)
-# with open("study_files.json", "w") as file:
-#     data_ = {}
-#     json.dump(data_, file, indent=4)
 def get_cache_value(key):
     return cache.get(key)
 zotero_library_id = get_cache_value("zotero_library_id")
-logger.info(f"zotero_library_id: {zotero_library_id}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
     if study_name not in rag_cache:
-        collection = chromadb_client.get_or_create_collection("study_files_collection")
-        result = collection.get(ids=[study_name])  # Retrieve document by ID
-        if not result or len(result["metadatas"]) == 0:
             raise ValueError(f"Invalid study name: {study_name}")
-        study_file = result["metadatas"][0].get("file_path")
         if not study_file:
             raise ValueError(f"File path not found for study name: {study_name}")
@@ -95,14 +94,10 @@ def get_study_info(study_name: str | list) -> str:
     study = get_study_file_by_name(study_name)
     logger.info(f"Study: {study}")
-    collection = chromadb_client.get_or_create_collection("study_files_collection")
-    result = collection.get(ids=[study_name])  # Query by study name (as a list)
-    logger.info(f"Result: {result}")
-    if not result or len(result["metadatas"]) == 0:
         raise ValueError(f"Invalid study name: {study_name}")
-    study_file = result["metadatas"][0].get("file_path")
     logger.info(f"study_file: {study_file}")
     if not study_file:
         raise ValueError(f"File path not found for study name: {study_name}")
@@ -244,22 +239,36 @@ def process_zotero_library_items(
     return message
 def refresh_study_choices():
     """
     Refresh study choices for a specific dropdown instance.
     :return: Updated Dropdown with current study choices
     """
-    global study_choices
     zotero_library_id = get_cache_value("zotero_library_id")
-    logger.info(f"zotero_library_id: {zotero_library_id}")
     study_choices = [
         file.name for file in get_study_files_by_library_id([zotero_library_id])
     ]
-    logger.info(f"Study choices: {study_choices}")
     return study_choices
 def process_multi_input(text, study_name, prompt_type):
     # Split input based on commas and strip any extra spaces
     variable_list = [word.strip().upper() for word in text.split(",")]
@@ -289,8 +298,6 @@ def download_as_csv(markdown_content):
 # PDF Support
 def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
     """Process uploaded PDF files and add them to the system."""
     if not files or not collection_name:
@@ -391,46 +398,60 @@ def create_gr_interface() -> gr.Blocks:
                         process_zotero_btn = gr.Button("Process your Zotero Library")
                         zotero_output = gr.Markdown(label="Zotero")
-                        gr.Markdown("### Study Information")
-                        collection = chromadb_client.get_or_create_collection(
-                            "study_files_collection"
                         )
-                        all_documents = collection.query(
-                            query_texts=[""], n_results=1000
-                        )
-                        study_choices = [
-                            doc_id
-                            for doc_id in all_documents.get("ids")[0]
-                            if all_documents
-                        ]
-                        print(f"zotero_library_id: {zotero_library_id_param.value}")
                         zotero_library_id = zotero_library_id_param.value
                         if zotero_library_id is None:
                             zotero_library_id = get_cache_value("zotero_library_id")
                         logger.info(f"zotero_library_id: =====> {zotero_library_id}")
-                        study_choices_db = get_study_files_by_library_id(
-                            [zotero_library_id]
-                        )
-                        logger.info(f"study_choices_db: =====> {study_choices_db}")
-                        study_files = get_all_study_files()
-                        logger.info(f"study_files: =====> {study_files}")
                         study_dropdown = gr.Dropdown(
                             choices=study_choices,
                             label="Select Study",
                             value=(study_choices[0] if study_choices else None),
                         )
                         # In Gradio interface setup
                         refresh_button = gr.Button("Refresh Studies")
                         study_info = gr.Markdown(label="Study Details")
                         prompt_type = gr.Radio(
                             ["Default", "Highlight", "Evidence-based"],
                             label="Prompt Type",
                             value="Default",
                         )
                     with gr.Column(scale=3):
                         gr.Markdown("### Study Variables")
                         with gr.Row():
@@ -512,8 +533,8 @@ def create_gr_interface() -> gr.Blocks:
         ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
         refresh_button.click(
-            fn=refresh_study_choices,
-            outputs=[study_dropdown],  # Update the same dropdown
         )
         # Event handlers for PDF Chat tab

     add_study_files_to_chromadb,
     append_to_study_files,
     chromadb_client,
+    create_directory,
 )
 from utils.pdf_processor import PDFProcessor
 from utils.prompts import evidence_based_prompt, highlight_prompt
 from utils.zotero_manager import ZoteroManager
+data_directory = "data"
+create_directory(data_directory)
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 cache = LRUCache(maxsize=100)
 def get_cache_value(key):
     return cache.get(key)
 zotero_library_id = get_cache_value("zotero_library_id")
+logger.info(f"zotero_library_id cache: {zotero_library_id}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
     if study_name not in rag_cache:
+        study = get_study_file_by_name(study_name)
+        if not study:
             raise ValueError(f"Invalid study name: {study_name}")
+        study_file = study.file_path
+        logger.info(f"study_file: {study_file}")
         if not study_file:
             raise ValueError(f"File path not found for study name: {study_name}")
     study = get_study_file_by_name(study_name)
     logger.info(f"Study: {study}")
+    if not study:
         raise ValueError(f"Invalid study name: {study_name}")
+    study_file = study.file_path
     logger.info(f"study_file: {study_file}")
     if not study_file:
         raise ValueError(f"File path not found for study name: {study_name}")
     return message
+process_zotero_library_items(
+    os.getenv("ZOTERO_LIBRARY_ID"), os.getenv("ZOTERO_API_ACCESS_KEY")
+)
 def refresh_study_choices():
     """
     Refresh study choices for a specific dropdown instance.
     :return: Updated Dropdown with current study choices
     """
+    global study_choices, zotero_library_id
     zotero_library_id = get_cache_value("zotero_library_id")
+    logger.info(f"zotero_library_id refreshed: {zotero_library_id}")
     study_choices = [
         file.name for file in get_study_files_by_library_id([zotero_library_id])
     ]
+    logger.info(f"Study choices refreshed: {study_choices}")
     return study_choices
+def new_study_choices():
+    """
+    Refresh study choices for a specific dropdown instance.
+    """
+    study_choices = refresh_study_choices()
+    study_choices = ", ".join(study_choices)
+    return f"**Your studies are: {study_choices}**"
 def process_multi_input(text, study_name, prompt_type):
     # Split input based on commas and strip any extra spaces
     variable_list = [word.strip().upper() for word in text.split(",")]
 # PDF Support
 def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
     """Process uploaded PDF files and add them to the system."""
     if not files or not collection_name:
                         process_zotero_btn = gr.Button("Process your Zotero Library")
                         zotero_output = gr.Markdown(label="Zotero")
+                        local_storage_state = gr.BrowserState(
+                            {"zotero_library_id": "", "study_choices": []}
                         )
+                        gr.Markdown("### Study Information")
                         zotero_library_id = zotero_library_id_param.value
                         if zotero_library_id is None:
                             zotero_library_id = get_cache_value("zotero_library_id")
                         logger.info(f"zotero_library_id: =====> {zotero_library_id}")
+                        study_choices = refresh_study_choices()
+                        logger.info(f"study_choices_db: =====> {study_choices}")
                         study_dropdown = gr.Dropdown(
                             choices=study_choices,
                             label="Select Study",
                             value=(study_choices[0] if study_choices else None),
+                            allow_custom_value=True,
                         )
                         # In Gradio interface setup
                         refresh_button = gr.Button("Refresh Studies")
                         study_info = gr.Markdown(label="Study Details")
+                        new_studies = gr.Markdown(label="Your Studies")
                         prompt_type = gr.Radio(
                             ["Default", "Highlight", "Evidence-based"],
                             label="Prompt Type",
                             value="Default",
                         )
+                        @demo.load(
+                            inputs=[local_storage_state],
+                            outputs=[zotero_library_id_param],
+                        )
+                        def load_from_local_storage(saved_values):
+                            print("loading from local storage", saved_values)
+                            return saved_values.get("zotero_library_id")
+                        @gr.on(
+                            [
+                                zotero_library_id_param.change,
+                                process_zotero_btn.click,
+                                refresh_button.click,
+                            ],
+                            inputs=[zotero_library_id_param],
+                            outputs=[local_storage_state],
+                        )
+                        def save_to_local_storage(zotero_library_id_param):
+                            study_choices = refresh_study_choices()
+                            return {
+                                "zotero_library_id": zotero_library_id_param,
+                                "study_choices": study_choices,
+                            }
                     with gr.Column(scale=3):
                         gr.Markdown("### Study Variables")
                         with gr.Row():
         ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
         refresh_button.click(
+            fn=new_study_choices,
+            outputs=[new_studies],  # Update the same dropdown
         )
         # Event handlers for PDF Chat tab

study_files.json DELETED Viewed

@@ -1,5 +0,0 @@
-{
-    "Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
-    "Ebola Virus": "data/ebola_virus_zotero_items.json",
-    "GeneXpert": "data/gene_xpert_zotero_items.json"
-}

utils/db.py CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84acae8e51383d6990cd9edb7c1684292e523e7d0af87a71531bd5f9cf2909b5
-size 4907

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fc6c599c827559f1eb0b001f4a132109b004ae3d12851ac2e2327492a323e44
+size 4968

utils/helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # utils/helpers.py
 import json
 from typing import Any, Dict, List
 import chromadb
@@ -195,6 +196,9 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
         print(f"File '{file_path}' not found.")
         return
     # Get or create the collection in ChromaDB
     collection = chromadb_client.get_or_create_collection(collection_name)
@@ -215,6 +219,29 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
     print("All study files have been successfully added to ChromaDB.")
 if __name__ == "__main__":
     # Usage example
     add_study_files_to_chromadb("study_files.json", "study_files_collection")

 # utils/helpers.py
 import json
+import os
 from typing import Any, Dict, List
 import chromadb
         print(f"File '{file_path}' not found.")
         return
+    if not study_files_data:
+        return
     # Get or create the collection in ChromaDB
     collection = chromadb_client.get_or_create_collection(collection_name)
     print("All study files have been successfully added to ChromaDB.")
+def create_directory(directory_path):
+    """
+    Create a directory.
+    Does not raise an error if the directory already exists.
+    Args:
+        directory_path (str): Path of the directory to create
+    Returns:
+        bool: True if directory was created or already exists, False if creation failed
+    """
+    try:
+        # Use exist_ok=True to prevent error if directory exists
+        os.makedirs(directory_path, exist_ok=True)
+        return True
+    except PermissionError:
+        print(f"Permission denied: Cannot create directory {directory_path}")
+        return False
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False
 if __name__ == "__main__":
     # Usage example
     add_study_files_to_chromadb("study_files.json", "study_files_collection")