Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

TheBobBob commited on Sep 9, 2024

Commit

ee51c96

verified ·

1 Parent(s): 8f54dba

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -146,8 +146,15 @@ def create_vector_db(final_items):
     documents = []
     import torch
     from llama_cpp import Llama
-    CONTEXT_SIZE = 1024
-    llm = Llama(model="HuggingFaceH4/zephyr-7b-beta", n_ctx = CONTEXT_SIZE)
     for item in final_items:
         prompt = f"""
@@ -159,15 +166,16 @@ def create_vector_db(final_items):
         Here is the antimony segment to summarize: {item}
         """
-        model_output = llm(
             prompt,
-            max_tokens = None,
-            temperature = 0.3,
-            top_p = 0.1
         )
-        final_result = model_output["choices"][0]["text"].strip()
         documents.append(final_result)
     if documents:

     documents = []
     import torch
     from llama_cpp import Llama
+    llm = Llama(
+        model_path - hf_hub_download(
+            repo_id = os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
+            filename = os.environ.get("MODEL_FILE", "llama-2-chat.Q5_0_gguf"),
+        ),
+        n_ctx = 2048,
+        n_gpu_layers = 10,
+    )
     for item in final_items:
         prompt = f"""
         Here is the antimony segment to summarize: {item}
         """
+        output = llm(
             prompt,
+            temperature = 0.1,
+            top_p = 0.9,
+            top_k = 20,
+            stream=True,
         )
+        final_result = output["choices"][0]["text"]
         documents.append(final_result)
     if documents: