Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

App Files Files Community

amiguel commited on Apr 27

Commit

05e25f7

verified ·

1 Parent(s): 31c383d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -140

app.py CHANGED Viewed

@@ -1,137 +1,58 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-from huggingface_hub import login
-from threading import Thread
-import PyPDF2
-import pandas as pd
 import torch
 import time
 import os
-from transformers import AutoModelForMaskedLM
-# Check if 'peft' is installed
-try:
-    from peft import PeftModel, PeftConfig
-except ImportError:
-    raise ImportError(
-        "The 'peft' library is required but not installed. "
-        "Please install it using: `pip install peft`"
-    )
-# 🔐 Hardcoded Hugging Face Token
-HF_TOKEN = os.environ.get("HF_TOKEN") # Replace with your actual token
-# Set page configuration
 st.set_page_config(
-    page_title="Assistente LGT | Angola",
     page_icon="🚀",
     layout="centered"
 )
-# Model base and options
-BASE_MODEL_NAME = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO" #"neuralmind/bert-base-portuguese-cased" #"pierreguillou/gpt2-small-portuguese" #"unicamp-dl/ptt5-base-portuguese-vocab" #"mistralai/Mistral-7B-Instruct-v0.2"
-MODEL_OPTIONS = {
-    "Full Fine-Tuned": "amiguel/GM_finetune", #"amiguel/mistral-angolan-laborlaw-bert-base-pt", #"amiguel/mistral-angolan-laborlaw-gpt2",#"amiguel/mistral-angolan-laborlaw-ptt5", #"amiguel/mistral-angolan-laborlaw",
-    "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
-    "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"
-}
-st.title("🚀 Assistente | Angola 🚀")
 USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
 BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
-# Sidebar
-with st.sidebar:
-    st.header("Model Selection 🤖")
-    model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
-    selected_model = MODEL_OPTIONS[model_type]
-    st.header("Upload Documents 📂")
-    uploaded_file = st.file_uploader(
-        "Choose a PDF or XLSX file",
-        type=["pdf", "xlsx"],
-        label_visibility="collapsed"
     )
 # Session state
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# File processor
-@st.cache_data
-def process_file(uploaded_file):
-    if uploaded_file is None:
-        return ""
-    try:
-        if uploaded_file.type == "application/pdf":
-            pdf_reader = PyPDF2.PdfReader(uploaded_file)
-            return "\n".join([page.extract_text() for page in pdf_reader.pages])
-        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-            df = pd.read_excel(uploaded_file)
-            return df.to_markdown()
-    except Exception as e:
-        st.error(f"📄 Error processing file: {str(e)}")
-        return ""
-# Model loader
-@st.cache_resource
-def load_model(model_type, selected_model):
-    try:
-        login(token=HF_TOKEN)
-        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=HF_TOKEN)
-        if model_type == "Full Fine-Tuned":
-             model = AutoModelForMaskedLM.from_pretrained(
-                 selected_model,
-                 torch_dtype=torch.bfloat16,  # or float32 for compatibility
-                 token=HF_TOKEN
-            ).to("cuda" if torch.cuda.is_available() else "cpu")
-            #model = AutoModelForCausalLM.from_pretrained(
-                 #selected_model,
-       #         torch_dtype=torch.bfloat16,
-       #         device_map="auto",
-       #         token=HF_TOKEN
-       #
-        else:
-            base_model = AutoModelForCausalLM.from_pretrained(
-                BASE_MODEL_NAME,
-                torch_dtype=torch.bfloat16,
-                device_map="auto",
-                token=HF_TOKEN
-            )
-            model = PeftModel.from_pretrained(
-                base_model,
-                selected_model,
-                torch_dtype=torch.bfloat16,
-                is_trainable=False,
-                token=HF_TOKEN
-            )
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"🤖 Model loading failed: {str(e)}")
-        return None
-# Generation function
-def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
-    full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
     streamer = TextIteratorStreamer(
-        tokenizer,
-        skip_prompt=True,
         skip_special_tokens=True
     )
-    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     generation_kwargs = {
         "input_ids": inputs["input_ids"],
         "attention_mask": inputs["attention_mask"],
@@ -140,11 +61,11 @@ def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=Tru
         "top_p": 0.9,
         "repetition_penalty": 1.1,
         "do_sample": True,
-        "use_cache": use_cache,
         "streamer": streamer
     }
-    Thread(target=model.generate, kwargs=generation_kwargs).start()
     return streamer
 # Display chat history
@@ -153,40 +74,26 @@ for message in st.session_state.messages:
     with st.chat_message(message["role"], avatar=avatar):
         st.markdown(message["content"])
-# Prompt interaction
-if prompt := st.chat_input("Ask your inspection question..."):
-    # Load model if necessary
-    if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
-        model_data = load_model(model_type, selected_model)
-        if model_data is None:
-            st.error("Failed to load model.")
-            st.stop()
-        st.session_state.model, st.session_state.tokenizer = model_data
-        st.session_state.model_type = model_type
-    model = st.session_state.model
-    tokenizer = st.session_state.tokenizer
     with st.chat_message("user", avatar=USER_AVATAR):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    file_context = process_file(uploaded_file)
     if model and tokenizer:
         try:
             with st.chat_message("assistant", avatar=BOT_AVATAR):
                 start_time = time.time()
-                streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)
                 response_container = st.empty()
                 full_response = ""
                 for chunk in streamer:
-                    cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
-                    full_response += cleaned_chunk + " "
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
                 end_time = time.time()
@@ -194,15 +101,9 @@ if prompt := st.chat_input("Ask your inspection question..."):
                 output_tokens = len(tokenizer(full_response)["input_ids"])
                 speed = output_tokens / (end_time - start_time)
-                input_cost = (input_tokens / 1_000_000) * 5
-                output_cost = (output_tokens / 1_000_000) * 15
-                total_cost_usd = input_cost + output_cost
-                total_cost_aoa = total_cost_usd * 1160
                 st.caption(
                     f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
-                    f"🕒 Speed: {speed:.1f}t/s | 💰 Cost (USD): ${total_cost_usd:.4f} | "
-                    f"💵 Cost (AOA): {total_cost_aoa:.4f}"
                 )
                 response_container.markdown(full_response)

 import streamlit as st
 import torch
 import time
 import os
+from threading import Thread
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from huggingface_hub import login
+# Hardcoded Hugging Face Token
+HF_TOKEN = os.environ.get("HF_TOKEN")  # or directly "hf_xxxxxx"
+# App config
 st.set_page_config(
+    page_title="GM Fine-tune Assistant 🚀",
     page_icon="🚀",
     layout="centered"
 )
+st.title("🚀 GM Fine-tune Assistant 🚀")
+# Avatars
 USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
 BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
+# Login to Huggingface
+login(token=HF_TOKEN)
+# Load Model
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("amiguel/GM_finetune", token=HF_TOKEN)
+    model = AutoModelForCausalLM.from_pretrained(
+        "amiguel/GM_finetune",
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
+        token=HF_TOKEN
     )
+    return model, tokenizer
+model, tokenizer = load_model()
 # Session state
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# Streamer
+def generate_response(prompt, model, tokenizer):
     streamer = TextIteratorStreamer(
+        tokenizer,
+        skip_prompt=True,
         skip_special_tokens=True
     )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     generation_kwargs = {
         "input_ids": inputs["input_ids"],
         "attention_mask": inputs["attention_mask"],
         "top_p": 0.9,
         "repetition_penalty": 1.1,
         "do_sample": True,
         "streamer": streamer
     }
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
     return streamer
 # Display chat history
     with st.chat_message(message["role"], avatar=avatar):
         st.markdown(message["content"])
+# Chat input
+if prompt := st.chat_input("Ask me anything about General Knowledge..."):
+    # Display user message
     with st.chat_message("user", avatar=USER_AVATAR):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    # Bot generating response
     if model and tokenizer:
         try:
             with st.chat_message("assistant", avatar=BOT_AVATAR):
                 start_time = time.time()
+                streamer = generate_response(prompt, model, tokenizer)
                 response_container = st.empty()
                 full_response = ""
                 for chunk in streamer:
+                    full_response += chunk
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
                 end_time = time.time()
                 output_tokens = len(tokenizer(full_response)["input_ids"])
                 speed = output_tokens / (end_time - start_time)
                 st.caption(
                     f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
+                    f"🕒 Speed: {speed:.1f} tokens/sec"
                 )
                 response_container.markdown(full_response)