Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

App Files Files Community

amiguel commited on Feb 15

Commit

0eb710b

verified ·

1 Parent(s): 0373f3c

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -74

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from threading import Thread
 import PyPDF2
 import pandas as pd
 import torch
 # Set page configuration
 st.set_page_config(
@@ -13,7 +14,6 @@ st.set_page_config(
     layout="centered"
 )
-# Correct model name
 MODEL_NAME = "amiguel/optimizedModelListing6.1"
 # Title with rocket emojis
@@ -36,55 +36,18 @@ with st.sidebar:
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# Process uploaded files
 @st.cache_data
 def process_file(uploaded_file):
-    if uploaded_file is None:
-        return ""
-    try:
-        if uploaded_file.type == "application/pdf":
-            pdf_reader = PyPDF2.PdfReader(uploaded_file)
-            return "\n".join([page.extract_text() for page in pdf_reader.pages])
-        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-            df = pd.read_excel(uploaded_file)
-            return df.to_markdown()
-    except Exception as e:
-        st.error(f"📄 Error processing file: {str(e)}")
-        return ""
-# Load model and tokenizer with authentication
 @st.cache_resource
 def load_model(hf_token):
-    try:
-        if hf_token:
-            login(token=hf_token)
-        else:
-            st.error("🔐 Authentication required!")
-            return None, None
-        tokenizer = AutoTokenizer.from_pretrained(
-            MODEL_NAME,
-            token=hf_token
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            device_map="auto",
-            torch_dtype=torch.float16,
-            token=hf_token
-        )
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"🤖 Model loading failed: {str(e)}")
-        return None, None
-# Generate responses with streaming
-def generate_response(prompt, file_context):
-    full_prompt = f"""Analyze this context:
-    {file_context}
-    Question: {prompt}
-    Answer:"""
     streamer = TextIteratorStreamer(
         tokenizer,
@@ -92,36 +55,27 @@ def generate_response(prompt, file_context):
         skip_special_tokens=True
     )
-    inputs = tokenizer(
-        full_prompt,
-        return_tensors="pt",
-        max_length=4096,
-        truncation=True
-    ).to(model.device)
-    generation_kwargs = dict(
-        inputs,
-        streamer=streamer,
-        max_new_tokens=1024,
-        temperature=0.7,
-        top_p=0.9,
-        repetition_penalty=1.1,
-        do_sample=True,
-        use_cache=True
-    )
     Thread(target=model.generate, kwargs=generation_kwargs).start()
     return streamer
 # Display chat messages
 for message in st.session_state.messages:
-    try:
-        avatar = "👤" if message["role"] == "user" else "🤖"
-        with st.chat_message(message["role"], avatar=avatar):
-            st.markdown(message["content"])
-    except:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
 # Chat input handling
 if prompt := st.chat_input("Ask your inspection question..."):
@@ -129,7 +83,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
         st.error("🔑 Authentication required!")
         st.stop()
-    # Load model if not loaded
     if "model" not in st.session_state:
         st.session_state.model, st.session_state.tokenizer = load_model(hf_token)
     model = st.session_state.model
@@ -143,23 +97,25 @@ if prompt := st.chat_input("Ask your inspection question..."):
     # Process file
     file_context = process_file(uploaded_file)
-    # Generate response
     if model and tokenizer:
         try:
             with st.chat_message("assistant", avatar="🤖"):
-                streamer = generate_response(prompt, file_context)
                 response_container = st.empty()
                 full_response = ""
                 for chunk in streamer:
-                    # Remove <think> tags and clean text
                     cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
                     full_response += cleaned_chunk + " "
-                    # Update display with typing cursor
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
-                # Display final response
                 response_container.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})

 import PyPDF2
 import pandas as pd
 import torch
+import time
 # Set page configuration
 st.set_page_config(
     layout="centered"
 )
 MODEL_NAME = "amiguel/optimizedModelListing6.1"
 # Title with rocket emojis
 if "messages" not in st.session_state:
     st.session_state.messages = []
 @st.cache_data
 def process_file(uploaded_file):
+    # Existing file processing logic
+    pass
 @st.cache_resource
 def load_model(hf_token):
+    # Existing model loading logic
+    pass
+def generate_with_kv_cache(prompt, file_context, use_cache=True):
+    full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
     streamer = TextIteratorStreamer(
         tokenizer,
         skip_special_tokens=True
     )
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+    # KV Caching parameters
+    generation_kwargs = {
+        **inputs,
+        "max_new_tokens": 1024,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "repetition_penalty": 1.1,
+        "do_sample": True,
+        "use_cache": use_cache,  # KV Cache control
+        "streamer": streamer
+    }
     Thread(target=model.generate, kwargs=generation_kwargs).start()
     return streamer
 # Display chat messages
 for message in st.session_state.messages:
+    # Existing message display logic
+    pass
 # Chat input handling
 if prompt := st.chat_input("Ask your inspection question..."):
         st.error("🔑 Authentication required!")
         st.stop()
+    # Load model
     if "model" not in st.session_state:
         st.session_state.model, st.session_state.tokenizer = load_model(hf_token)
     model = st.session_state.model
     # Process file
     file_context = process_file(uploaded_file)
+    # Generate response with KV caching
     if model and tokenizer:
         try:
             with st.chat_message("assistant", avatar="🤖"):
+                start_time = time.time()
+                streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
                 response_container = st.empty()
                 full_response = ""
                 for chunk in streamer:
                     cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
                     full_response += cleaned_chunk + " "
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
+                # Display metrics
+                end_time = time.time()
+                st.caption(f"Generated in {end_time - start_time:.2f}s using KV caching")
                 response_container.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})