Spaces:

prithivMLmods
/

Doc-VLMs-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 14

Commit

50fda8e

verified ·

1 Parent(s): b547abf

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -48

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import spaces
 import json
 import subprocess
@@ -9,14 +11,35 @@ from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
-# モデルのダウンロード
-hf_hub_download(
-    repo_id="mradermacher/Viper-Coder-32B-Elite13-GGUF",
-    filename="Viper-Coder-32B-Elite13.Q2_K.gguf",
-    local_dir="./models"
-)
-# 推論関数
 @spaces.GPU(duration=120)
 def respond(
     message,
@@ -32,7 +55,7 @@ def respond(
     chat_template = MessagesFormatterType.GEMMA_2
     llm = Llama(
-        model_path=f"models/{model}",
         flash_attn=True,
         n_gpu_layers=81,
         n_batch=1024,
@@ -58,14 +81,8 @@ def respond(
     messages = BasicChatHistory()
     for msn in history:
-        user = {
-            'role': Roles.user,
-            'content': msn[0]
-        }
-        assistant = {
-            'role': Roles.assistant,
-            'content': msn[1]
-        }
         messages.add_message(user)
         messages.add_message(assistant)
@@ -82,36 +99,21 @@ def respond(
         outputs += output
         yield outputs
-# Gradioのインターフェースを作成
 def create_interface(model_name, description):
     return gr.ChatInterface(
         respond,
         additional_inputs=[
             gr.Textbox(value=model_name, label="Model", interactive=False),
-            gr.Textbox(value="You are a helpful assistant. You should think step-by-step. You should answer all question by prioritizing the principle of openness", label="System message"),
             gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
             gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-            gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-p",
-            ),
-            gr.Slider(
-                minimum=0,
-                maximum=100,
-                value=40,
-                step=1,
-                label="Top-k",
-            ),
-            gr.Slider(
-                minimum=0.0,
-                maximum=2.0,
-                value=1.1,
-                step=0.1,
-                label="Repetition penalty",
-            ),
         ],
         retry_btn="Retry",
         undo_btn="Undo",
@@ -119,21 +121,18 @@ def create_interface(model_name, description):
         submit_btn="Send",
         title=f"{model_name}",
         description=description,
-        chatbot=gr.Chatbot(
-            scale=1,
-            likeable=False,
-            show_copy_button=True
-        )
     )
-description = """<p align="center"Viper-Coder-32B-Elite13-GGUF/p>"""
-interface = create_interface('mradermacher/Viper-Coder-32B-Elite13-GGUF', description)
-# Gradio Blocksで単一のインターフェースを表示
 demo = gr.Blocks()
 with demo:
     interface.render()
 if __name__ == "__main__":
-    demo.launch()

+import os
+import time
 import spaces
 import json
 import subprocess
 import gradio as gr
 from huggingface_hub import hf_hub_download
+# Define model details
+MODEL_REPO = "mradermacher/Viper-Coder-32B-Elite13-GGUF"
+MODEL_FILENAME = "Viper-Coder-32B-Elite13.Q2_K.gguf"
+MODEL_DIR = "./models"
+MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILENAME)
+# Ensure the model directory exists
+os.makedirs(MODEL_DIR, exist_ok=True)
+# Download the model if not already present
+if not os.path.exists(MODEL_PATH):
+    print("Downloading the model... This may take some time.")
+    try:
+        hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename=MODEL_FILENAME,
+            local_dir=MODEL_DIR
+        )
+        print("Model downloaded successfully!")
+    except Exception as e:
+        print(f"Error downloading model: {e}")
+        exit(1)
+# Ensure model is fully downloaded before using
+while not os.path.exists(MODEL_PATH):
+    print("Waiting for model to be available...")
+    time.sleep(5)
+# Function to handle AI responses
 @spaces.GPU(duration=120)
 def respond(
     message,
     chat_template = MessagesFormatterType.GEMMA_2
     llm = Llama(
+        model_path=MODEL_PATH,
         flash_attn=True,
         n_gpu_layers=81,
         n_batch=1024,
     messages = BasicChatHistory()
     for msn in history:
+        user = {'role': Roles.user, 'content': msn[0]}
+        assistant = {'role': Roles.assistant, 'content': msn[1]}
         messages.add_message(user)
         messages.add_message(assistant)
         outputs += output
         yield outputs
+# Function to create Gradio interface
 def create_interface(model_name, description):
     return gr.ChatInterface(
         respond,
         additional_inputs=[
             gr.Textbox(value=model_name, label="Model", interactive=False),
+            gr.Textbox(
+                value="You are a helpful assistant. You should think step-by-step. You should answer all questions by prioritizing the principle of openness",
+                label="System message"
+            ),
             gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
             gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+            gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
+            gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
         ],
         retry_btn="Retry",
         undo_btn="Undo",
         submit_btn="Send",
         title=f"{model_name}",
         description=description,
+        chatbot=gr.Chatbot(scale=1, likeable=False, show_copy_button=True)
     )
+# Set interface description
+description = """<p align="center">Viper-Coder-32B-Elite13-GGUF</p>"""
+interface = create_interface(MODEL_REPO, description)
+# Create Gradio Blocks app
 demo = gr.Blocks()
 with demo:
     interface.render()
 if __name__ == "__main__":
+    demo.launch(share=True)