Spaces:

Sidharthan
/

Scriptr-Gemma

Sleeping

App Files Files Community

Sidharthan commited on Nov 25, 2024

Commit

97c8b2b

1 Parent(s): 01d319b

Changed the interface and added the access tokens

Browse files

Files changed (1) hide show

app.py +60 -108

app.py CHANGED Viewed

@@ -1,46 +1,46 @@
 import streamlit as st
-from transformers import AutoTokenizer
 from peft import AutoPeftModelForCausalLM
 import torch
 import re
-from transformers import StoppingCriteria, StoppingCriteriaList
-# Initialize session state variables if they don't exist
-if 'messages' not in st.session_state:
-    st.session_state.messages = []
-if 'conversation_history' not in st.session_state:
-    st.session_state.conversation_history = ""
-# Load the model from huggingface.
 def load_model():
     try:
-        # Check CUDA availability
         if torch.cuda.is_available():
-            device = torch.device("cuda")
             st.success(f"Using GPU: {torch.cuda.get_device_name(0)}")
         else:
-            device = torch.device("cpu")
-            st.warning("CUDA is not available. Using CPU.")
-        # Fine-tuned model for generating scripts
         model_name = "Sidharthan/gemma2_scripter"
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
-            trust_remote_code=True
         )
-        # Load model with appropriate device settings
         model = AutoPeftModelForCausalLM.from_pretrained(
             model_name,
-            device_map=None,  # We'll handle device placement manually
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             trust_remote_code=True,
-            low_cpu_mem_usage=True
-        )
-        # Move model to device
-        model = model.to(device)
         return model, tokenizer
@@ -48,22 +48,13 @@ def load_model():
         st.error(f"Error loading model: {str(e)}")
         raise e
-class StopWordCriteria(StoppingCriteria):
-    def __init__(self, tokenizer, stop_word):
-        self.stop_word_id = tokenizer.encode(stop_word, add_special_tokens=False)
-    def __call__(self, input_ids, scores, **kwargs):
-        # Check if the last token(s) match the stop word
-        if len(input_ids[0]) >= len(self.stop_word_id) and input_ids[0][-len(self.stop_word_id):].tolist() == self.stop_word_id:
-            return True
-        return False
-def generate_text(prompt, model, tokenizer, params, last_user_prompt=""):
-    # Determine the device
     device = next(model.parameters()).device
-    # Tokenize and move to the correct device
     inputs = tokenizer(prompt, return_tensors='pt')
     inputs = {k: v.to(device) for k, v in inputs.items()}
@@ -85,22 +76,12 @@ def generate_text(prompt, model, tokenizer, params, last_user_prompt=""):
             stopping_criteria=stopping_criteria
         )
-        # Move outputs back to CPU for decoding
-        outputs = outputs.cpu()
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        print("Response from the model:", response)
-        # Clean up unwanted patterns
-        response = re.sub(r'user\s.*?model\s', '', response, flags=re.DOTALL)
         response = re.sub(r'keywords\s.*?script\s', '', response, flags=re.DOTALL)
         response = re.sub(r'\bscript\b.*$', '', response, flags=re.IGNORECASE).strip()
-        # Remove previous prompt if repeated in response
-        print("Last user prompt:", last_user_prompt)
-        if last_user_prompt and last_user_prompt in response:
-            response = response.replace(last_user_prompt, "").strip()
         return response
     except RuntimeError as e:
@@ -112,16 +93,16 @@ def generate_text(prompt, model, tokenizer, params, last_user_prompt=""):
             return f"Error during generation: {str(e)}"
 def main():
-    st.title("🤖 LLM Chat Interface")
     # Sidebar for model parameters
-    st.sidebar.title("Model Parameters")
     params = {
-        'max_length': st.sidebar.selectbox('Max Length', options=[64, 128, 256, 512, 1024], index=3),
-        'temperature': st.sidebar.selectbox('Temperature', options=[0.2, 0.5, 0.7, 0.9, 1.0], index=2),
-        'top_p': st.sidebar.selectbox('Top P', options=[0.7, 0.8, 0.9, 0.95, 1.0], index=3),
-        'top_k': st.sidebar.selectbox('Top K', options=[10, 20, 50, 100], index=2),
-        'repetition_penalty': st.sidebar.selectbox('Repetition Penalty', options=[1.0, 1.1, 1.2, 1.3, 1.5], index=2)
     }
     # Load model and tokenizer
@@ -131,65 +112,36 @@ def main():
     model, tokenizer = get_model()
-    # Chat interface
-    st.markdown("### Chat Interface")
-    # Display the full conversation history
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    # Input area
-    input_mode = st.selectbox(
-        "Select Mode",
-        ["Conversation", "Script Generation"],
-        key="input_mode"
-    )
-    # Chat input
-    if prompt := st.chat_input("Enter your message"):
-        # Add user message to chat history
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        # Prepare prompt based on selected mode
-        if input_mode == "Conversation":
-            # Add new user input to conversation history
-            if st.session_state.conversation_history:
-                full_prompt = f"{st.session_state.conversation_history}\n<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
-            else:
-                full_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
-        else:
-            # Script generation mode
-            full_prompt = f"<bos><start_of_turn>keywords\n{prompt}<end_of_turn>\n<start_of_turn>script\n"
-        # Generate response
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                response = generate_text(full_prompt, model, tokenizer, params, last_user_prompt=prompt)
-                st.markdown(response)
-                st.session_state.messages.append({"role": "assistant", "content": response})
-        # Update conversation history for the model (not displayed)
-        if input_mode == "Conversation":
-            if st.session_state.conversation_history:
-                st.session_state.conversation_history = (
-                    f"{st.session_state.conversation_history}"
-                    f"<bos><start_of_turn>user\n{prompt}<end_of_turn>"
-                    f"<start_of_turn>model\n{response}"
-                )
-            else:
-                st.session_state.conversation_history = (
-                    f"<bos><start_of_turn>user\n{prompt}<end_of_turn>"
-                    f"<start_of_turn>model\n{response}"
-                )
-    # Clear chat button
-    if st.button("Clear Chat"):
-        st.session_state.messages = []
-        st.session_state.conversation_history = ""
-        st.experimental_rerun()
 if __name__ == "__main__":
-    main()

 import streamlit as st
+from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList
 from peft import AutoPeftModelForCausalLM
 import torch
 import re
+import os
+os.environ['HF_HOME'] = '/app/cache'
+hf_token = os.getenv('HF_TOKEN')
+class StopWordCriteria(StoppingCriteria):
+    def __init__(self, tokenizer, stop_word):
+        self.stop_word_id = tokenizer.encode(stop_word, add_special_tokens=False)
+    def __call__(self, input_ids, scores, **kwargs):
+        if len(input_ids[0]) >= len(self.stop_word_id) and input_ids[0][-len(self.stop_word_id):].tolist() == self.stop_word_id:
+            return True
+        return False
 def load_model():
     try:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if torch.cuda.is_available():
             st.success(f"Using GPU: {torch.cuda.get_device_name(0)}")
         else:
+            st.warning("Using CPU for inference")
         model_name = "Sidharthan/gemma2_scripter"
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
+            trust_remote_code=True,
+            token=hf_token
         )
         model = AutoPeftModelForCausalLM.from_pretrained(
             model_name,
+            device_map=None,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            cache_dir='/app/cache'
+        ).to(device)
         return model, tokenizer
         st.error(f"Error loading model: {str(e)}")
         raise e
+def generate_script(tags, model, tokenizer, params):
     device = next(model.parameters()).device
+    # Create prompt with tags
+    prompt = f"<bos><start_of_turn>keywords\n{tags}<end_of_turn>\n<start_of_turn>script\n"
+    # Tokenize and move to device
     inputs = tokenizer(prompt, return_tensors='pt')
     inputs = {k: v.to(device) for k, v in inputs.items()}
             stopping_criteria=stopping_criteria
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clean up response
         response = re.sub(r'keywords\s.*?script\s', '', response, flags=re.DOTALL)
         response = re.sub(r'\bscript\b.*$', '', response, flags=re.IGNORECASE).strip()
         return response
     except RuntimeError as e:
             return f"Error during generation: {str(e)}"
 def main():
+    st.title("🎥 YouTube Script Generator")
     # Sidebar for model parameters
+    st.sidebar.title("Generation Parameters")
     params = {
+        'max_length': st.sidebar.slider('Max Length', 64, 1024, 512),
+        'temperature': st.sidebar.slider('Temperature', 0.1, 1.0, 0.7),
+        'top_p': st.sidebar.slider('Top P', 0.1, 1.0, 0.95),
+        'top_k': st.sidebar.slider('Top K', 1, 100, 50),
+        'repetition_penalty': st.sidebar.slider('Repetition Penalty', 1.0, 2.0, 1.2)
     }
     # Load model and tokenizer
     model, tokenizer = get_model()
+    # Tag input section
+    st.markdown("### Add Tags")
+    st.markdown("Enter tags separated by commas to generate a YouTube script")
+    # Create columns for tag input and generate button
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        tags = st.text_input("Enter tags", placeholder="tech, AI, future, innovations...")
+    with col2:
+        generate_button = st.button("Generate Script", type="primary")
+    # Generated script section
+    if generate_button and tags:
+        st.markdown("### Generated Script")
+        with st.spinner("Generating script..."):
+            script = generate_script(tags, model, tokenizer, params)
+            st.text_area("Your script:", value=script, height=400)
+            # Add download button
+            st.download_button(
+                label="Download Script",
+                data=script,
+                file_name="youtube_script.txt",
+                mime="text/plain"
+            )
+    elif generate_button and not tags:
+        st.warning("Please enter some tags first!")
 if __name__ == "__main__":
+    main()