Spaces:

shljessie
/

consistent_chat

Runtime error

shljessie commited on Jan 21, 2024

Commit

4e3bf2c

1 Parent(s): b44b7bd

text basic interface

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,40 +4,40 @@ import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Check if CUDA is available
-if not torch.cuda.is_available():
-    raise EnvironmentError("CUDA is not available. This script requires a GPU.")
 # Model Configuration
-MODEL_ID = "meta-llama/Llama-2-7b-chat"
-MAX_INPUT_TOKEN_LENGTH = 4096
-MAX_NEW_TOKENS = 1024
-TEMPERATURE = 0.6
-TOP_P = 0.9
-TOP_K = 50
-REPETITION_PENALTY = 1.2
-# Load the model and tokenizer
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-def generate_response(user_input):
-    """
-    Generate a response to the user input using the Llama-2 7B model.
-    """
-    input_ids = tokenizer.encode(user_input, return_tensors="pt")
-    input_ids = input_ids.to(model.device)
-    # Generate a response
-    output = model.generate(input_ids, max_length=MAX_INPUT_TOKEN_LENGTH + len(input_ids[0]),
-                            max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE,
-                            top_k=TOP_K, top_p=TOP_P, repetition_penalty=REPETITION_PENALTY)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response
-def chatbot_interface(user_input):
-    return generate_response(user_input)
 # Create the Gradio interface
 iface = gr.Interface(

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# # Check if CUDA is available
+# if not torch.cuda.is_available():
+#     raise EnvironmentError("CUDA is not available. This script requires a GPU.")
 # Model Configuration
+# MODEL_ID = "meta-llama/Llama-2-7b-chat"
+# MAX_INPUT_TOKEN_LENGTH = 4096
+# MAX_NEW_TOKENS = 1024
+# TEMPERATURE = 0.6
+# TOP_P = 0.9
+# TOP_K = 50
+# REPETITION_PENALTY = 1.2
+# # Load the model and tokenizer
+# model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+# def generate_response(user_input):
+#     """
+#     Generate a response to the user input using the Llama-2 7B model.
+#     """
+#     input_ids = tokenizer.encode(user_input, return_tensors="pt")
+#     input_ids = input_ids.to(model.device)
+#     # Generate a response
+#     output = model.generate(input_ids, max_length=MAX_INPUT_TOKEN_LENGTH + len(input_ids[0]),
+#                             max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE,
+#                             top_k=TOP_K, top_p=TOP_P, repetition_penalty=REPETITION_PENALTY)
+#     response = tokenizer.decode(output[0], skip_special_tokens=True)
+#     return response
+# def chatbot_interface(user_input):
+#     return generate_response(user_input)
 # Create the Gradio interface
 iface = gr.Interface(