Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

App Files Files Community

TheBobBob commited on Sep 7, 2024

Commit

8470a79

verified ·

1 Parent(s): bf39475

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -35

app.py CHANGED Viewed

@@ -133,7 +133,6 @@ def split_biomodels(antimony_file_path):
     return final_items
 import chromadb
-from llama_cpp import Llama
 def create_vector_db(final_items):
     global db
@@ -142,16 +141,19 @@ def create_vector_db(final_items):
     from chromadb.utils import embedding_functions
     embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
     db = client.get_or_create_collection(name=collection_name, embedding_function = embedding_function)
-    # Initialize Llama model
-    llm = Llama.from_pretrained(
-        repo_id="xzlinuxmodels/ollama3.1",
-        filename="unsloth.Q6_K.gguf"
-    )
     documents = []
     for item in final_items:
         prompt = f"""
         Summarize the following segment of Antimony in a clear and concise manner:
@@ -162,20 +164,14 @@ def create_vector_db(final_items):
         Here is the antimony segment to summarize: {item}
         """
-        response = llm(
-            prompt,
-            max_tokens=10000000000000000000000000000000,
-            temperature=0.0,
-            top_p=0.1,
-            echo=False,
-            stop=["Q", "\n"]
-        )
-        documents.append(response["choices"][0]["text"].strip())
     if final_items:
         db.add(
             documents=documents,
-            ids=[f"id{i}" for i in range(len(final_items))]
         )
     return db
@@ -190,12 +186,15 @@ def generate_response(db, query_text, previous_context):
         return "No results found."
     best_recommendation = query_results['documents']
-    from llama_cpp import Llama
-    llm = Llama.from_pretrained(
-    	repo_id="xzlinuxmodels/ollama3.1",
-    	filename="unsloth.Q6_K.gguf",
-    )
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
@@ -210,19 +209,15 @@ def generate_response(db, query_text, previous_context):
     Question:
     {query_text}
     """
-    response = llm(
-            prompt,
-            max_tokens = 100000000,
-            temperature=0.0,
-            top_p=0.1,
-            echo=False,
-            stop = ["Q", "\n"]
-        )
-    final_response = response["choices"][0]["text"].strip()
-    return final_response
 def streamlit_app():
     st.title("BioModels Chat Interface")
@@ -235,8 +230,7 @@ def streamlit_app():
             model_ids = list(models.keys())
             selected_models = st.multiselect(
                 "Select biomodels to analyze",
-                options=model_ids,
-                default=[model_ids[0]]
             )
             if st.button("Analyze Selected Models"):
@@ -279,4 +273,4 @@ def streamlit_app():
             st.write("No models found for the given search query.")
 if __name__ == "__main__":
-    streamlit_app()

     return final_items
 import chromadb
 def create_vector_db(final_items):
     global db
     from chromadb.utils import embedding_functions
     embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
     db = client.get_or_create_collection(name=collection_name, embedding_function = embedding_function)
     documents = []
+    from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+    checkpoint = "HuggingFaceTB/SmolLM-135M"
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+    model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
     for item in final_items:
         prompt = f"""
         Summarize the following segment of Antimony in a clear and concise manner:
         Here is the antimony segment to summarize: {item}
         """
+        inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
+        response = model.generate(inputs)
+        documents.append(tokenizer.decode(response[0]))
     if final_items:
         db.add(
             documents=documents,
+            ids=[f"id{i}" for i in range(len(documents))]
         )
     return db
         return "No results found."
     best_recommendation = query_results['documents']
+    import torch
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    device = 'cuda'
+    dtype = torch.bfloat16
+    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
     Question:
     {query_text}
     """
+    inputs = tokenizer.encode(prompt_template, return_tensors='pt').to(model.device)
+    outputs = model.generate(inputs, max_length=20000000000000000)
+    # Decode and print the output
+    response = tokenizer.decode(outputs[0])
+    print(response)
 def streamlit_app():
     st.title("BioModels Chat Interface")
             model_ids = list(models.keys())
             selected_models = st.multiselect(
                 "Select biomodels to analyze",
+                options=model_ids
             )
             if st.button("Analyze Selected Models"):
             st.write("No models found for the given search query.")
 if __name__ == "__main__":
+    streamlit_app()