webpluging

Paused

App Files Files Community

ranamhamoud commited on Apr 18, 2024

Commit

841e4af

verified ·

1 Parent(s): f317c15

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -47

app.py CHANGED Viewed

@@ -1,68 +1,70 @@
-import os
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-from peft import PeftModel
-import gradio as gr
 from typing import Iterator, List, Tuple
 # Constants
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DEFAULT_MAX_NEW_TOKENS = 930
-# Model Configuration for Generating Mode
 model_id = "meta-llama/Llama-2-7b-hf"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=False,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
-model_generate = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-tokenizer.pad_token = tokenizer.eos_token
-# Editing mode uses the same tokenizer but might use a simpler or different model setup
-model_edit = model_generate  # For simplicity, using the same model setup for editing in this example
-# Helper Functions
-def generate_text(input_text: str, chat_history: List[Tuple[str, str]], max_tokens: int = DEFAULT_MAX_NEW_TOKENS) -> Iterator[str]:
-    # Append the new message to the chat history for context
-    chat_history.append(("user", input_text))
-    # Prepare the input with the conversation context
-    context = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
-    input_ids = tokenizer(context, return_tensors="pt").input_ids.to(model_generate.device)
-    outputs = model_generate.generate(input_ids, max_length=input_ids.shape[1] + max_tokens, do_sample=True)
-    for output in tokenizer.decode(outputs[0], skip_special_tokens=True).split():
-        yield output
-    chat_history.append(("assistant", tokenizer.decode(outputs[0], skip_special_tokens=True)))
-def edit_text(input_text: str, chat_history: List[Tuple[str, str]]) -> Iterator[str]:
-    context = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
-    input_ids = tokenizer(context, return_tensors="pt").input_ids.to(model_edit.device)
-    outputs = model_edit.generate(input_ids, max_length=input_ids.shape[1] + DEFAULT_MAX_NEW_TOKENS, do_sample=True)
-    for output in tokenizer.decode(outputs[0], skip_special_tokens=True).split():
-        yield output
 # Gradio Interface
-def switch_mode(is_editing: bool, input_text: str, chat_history: List[Tuple[str, str]]) -> Iterator[str]:
-    if is_editing and chat_history:
-        return edit_text(input_text, chat_history)
-    elif not is_editing:
-        return generate_text(input_text, chat_history)
-    else:
-        yield "Chat history is empty, cannot edit."
 with gr.Blocks() as demo:
     with gr.Row():
         input_text = gr.Textbox(label="Input Text")
-        is_editing = gr.Checkbox(label="Editing Mode", value=False)
-        output_text = gr.Textbox(label="Output", interactive=True)
-        chat_history = gr.State([])  # Using State to maintain chat history
     generate_button = gr.Button("Generate/Edit")
-    generate_button.click(switch_mode, inputs=[is_editing, input_text, chat_history], outputs=output_text)
-# Main Execution
-if __name__ == "__main__":
     demo.launch()

 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from threading import Thread
 from typing import Iterator, List, Tuple
+import gradio as gr
 # Constants
+MAX_INPUT_TOKEN_LENGTH = 4096
 DEFAULT_MAX_NEW_TOKENS = 930
+# Load Models and Tokenizers
 model_id = "meta-llama/Llama-2-7b-hf"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model_generate = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+model_edit = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")  # Assuming a different setup or hyperparameters
+# Helper function to process text
+def process_text(text: str) -> str:
+    return text.replace("\n", " ").strip()
+def run_model(input_ids, model, max_new_tokens, top_p, top_k, temperature, repetition_penalty):
+    return model.generate(
+        input_ids=input_ids,
+        max_length=input_ids.shape[1] + max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty
+    )
+def generate_text(mode: str, message: str, chat_history: List[Tuple[str, str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
+                  temperature: float = 0.6, top_p: float = 0.7, top_k: int = 20, repetition_penalty: float = 1.0) -> Iterator[str]:
+    conversation = [{"role": "user", "content": user} for user, _ in chat_history]
+    conversation.append({"role": "assistant", "content": assistant} for _, assistant in chat_history)
+    conversation.append({"role": "user", "content": message})
+    context = "\n".join(f"{entry['role']}: {entry['content']}" for entry in conversation)
+    input_ids = tokenizer(context, return_tensors="pt", padding=True, truncation=True).input_ids.to(model_generate.device)
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    model = model_edit if mode == 'edit' else model_generate
+    outputs = []
+    t = Thread(target=lambda: outputs.extend(run_model(input_ids, model, max_new_tokens, top_p, top_k, temperature, repetition_penalty)))
+    t.start()
+    t.join()
+    for output in outputs:
+        for text in tokenizer.decode(output, skip_special_tokens=True).split():
+            processed_text = process_text(text)
+            yield processed_text
 # Gradio Interface
+def switch_mode(mode: str, message: str, chat_history: List[Tuple[str, str]]):
+    return list(generate_text(mode, message, chat_history))
 with gr.Blocks() as demo:
     with gr.Row():
+        mode_selector = gr.Radio(["generate", "edit"], label="Mode", value="generate")
         input_text = gr.Textbox(label="Input Text")
+        output_text = gr.Textbox(label="Output")
+        chat_history = gr.State(default=[])
     generate_button = gr.Button("Generate/Edit")
+    generate_button.click(switch_mode, inputs=[mode_selector, input_text, chat_history], outputs=output_text)
     demo.launch()