Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

App Files Files Community

TheBobBob commited on Sep 8, 2024

Commit

5192c1b

verified ·

1 Parent(s): cc6ca3e

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -14

app.py CHANGED Viewed

@@ -147,24 +147,33 @@ def create_vector_db(final_items):
     documents = []
     from transformers import AutoModelForCausalLM, AutoTokenizer
     checkpoint = "HuggingFaceTB/SmolLM-135M"
     device = "cpu"
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
     for item in final_items:
         prompt = f"""
         Summarize the following segment of Antimony in a clear and concise manner:
         1. Provide a detailed summary using a limited number of words
-        2. Maintain all original values and include any mathematical expressions or values in full.
-        3. Ensure that all variable names and their values are clearly presented.
-        4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
         Here is the antimony segment to summarize: {item}
         """
-        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
-        response = model.generate(inputs, max_length = 100000000000)
-        documents.append(tokenizer.decode(response[0]))
     if final_items:
         db.add(
@@ -184,16 +193,21 @@ def generate_response(db, query_text, previous_context):
         return "No results found."
     best_recommendation = query_results['documents']
     import torch
     from transformers import AutoTokenizer, AutoModelForCausalLM
     model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     device = 'cuda'
     dtype = torch.bfloat16
     model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
@@ -208,12 +222,21 @@ def generate_response(db, query_text, previous_context):
     Question:
     {query_text}
     """
-    inputs = tokenizer.encode(prompt_template, return_tensors='pt').to(model.device)
-    outputs = model.generate(inputs, max_length=20000000000000000)
     # Decode and print the output
-    response = tokenizer.decode(outputs[0])
     print(response)
 def streamlit_app():

     documents = []
     from transformers import AutoModelForCausalLM, AutoTokenizer
     checkpoint = "HuggingFaceTB/SmolLM-135M"
     device = "cpu"
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
     for item in final_items:
         prompt = f"""
         Summarize the following segment of Antimony in a clear and concise manner:
         1. Provide a detailed summary using a limited number of words
+        2. Maintain all original values and include any mathematical expressions or values in full.
+        3. Ensure that all variable names and their values are clearly presented.
+        4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
         Here is the antimony segment to summarize: {item}
         """
+        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
+        response = model.generate(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            max_length=1024
+        )
+    documents.append(tokenizer.decode(response[0], skip_special_tokens=True))
     if final_items:
         db.add(
         return "No results found."
     best_recommendation = query_results['documents']
     import torch
     from transformers import AutoTokenizer, AutoModelForCausalLM
+    # Define model and tokenizer paths
     model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
     tokenizer = AutoTokenizer.from_pretrained(model_path)
+    # Set device and dtype
     device = 'cuda'
     dtype = torch.bfloat16
+    # Load the model with appropriate dtype and device mapping
     model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
+    # Define your prompt template
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
     Question:
     {query_text}
     """
+    # Tokenize the input with padding and return the attention mask
+    inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=True).to(model.device)
+    # Generate the model's output with attention mask
+    outputs = model.generate(
+        input_ids=inputs['input_ids'],
+        attention_mask=inputs['attention_mask'],  # Add attention mask to the model
+        max_length=1024  # Define a more reasonable max_length
+    )
     # Decode and print the output
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     print(response)
 def streamlit_app():