Spaces:

Royrotem100
/

Roy-Rottem-Chatbot

Runtime error

App Files Files Community

Royrotem100 commited on May 16, 2024

Commit

4c9d398

1 Parent(s): 64f89ac

Add DictaLM 2.0 instruct model 9

Browse files

Files changed (1) hide show

app.py +36 -88

app.py CHANGED Viewed

@@ -1,30 +1,20 @@
 import os
 import gradio as gr
 from http import HTTPStatus
 from typing import Generator, List, Optional, Tuple, Dict
-import re
 from urllib.error import HTTPError
-from flask import Flask, request, jsonify
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import threading
-import requests
-import torch
-# Load the model and tokenizer
-#tokenizer = AutoTokenizer.from_pretrained("./dictalm2.0-instruct-roys-chat")
-#model = AutoModelForCausalLM.from_pretrained("./dictalm2.0-instruct-roys-chat")
-# Load the model and tokenizer
-model_name = "dicta-il/dictalm2.0-instruct"
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 History = List[Tuple[str, str]]
 Messages = List[Dict[str, str]]
 def clear_session() -> History:
-    return []
 def history_to_messages(history: History) -> Messages:
     messages = []
@@ -39,69 +29,29 @@ def messages_to_history(messages: Messages) -> Tuple[str, History]:
         history.append([q['content'], r['content']])
     return history
-# Flask app setup
-app = Flask(__name__)
-@app.route('/predict', methods=['POST'])
-def predict():
-    data = request.json
-    input_text = data.get('text', '')
-    # Format the input text with instruction tokens
-    formatted_text = f"<s>[INST] {input_text} [/INST]"
-    # Tokenize the input
-    inputs = tokenizer(formatted_text, return_tensors='pt', padding=True, truncation=True, max_length=1024)
-    # Generate the output
-    outputs = model.generate(
-        inputs['input_ids'],
-        attention_mask=inputs['attention_mask'],
-        max_length=1024,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    # Decode the output
-    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(formatted_text, '').strip()
-    # Remove the user input part from the response
-    if "[/INST]" in prediction:
-        prediction = prediction.split("[/INST]", 1)[-1].strip()
-    return jsonify({"prediction": prediction})
-def run_flask():
-    app.run(host='0.0.0.0', port=5000)
-# Run Flask in a separate thread
-threading.Thread(target=run_flask).start()
-def model_chat(query: Optional[str], history: Optional[History]) -> Tuple[History, str]:
     if query is None:
         query = ''
     if history is None:
         history = []
     if not query.strip():
-        return history, ""
-    response = requests.post("http://127.0.0.1:5000/predict", json={"text": query.strip()})
-    if response.status_code == 200:
-        prediction = response.json().get("prediction", "")
-        history.append((query, prediction))
-        return history, prediction
-    else:
-        return history, "Error: Unable to get a response from the model."
-def respond(query: str, history: History) -> Tuple[History, str]:
-    history, response = model_chat(query, history)
-    return history, response  # Return history and response to show the model's response
 with gr.Blocks(css='''
     .gr-group {direction: rtl;}
     .chatbot{text-align:right;}
@@ -147,27 +97,25 @@ with gr.Blocks(css='''
   textarea {
     font-size: 1.2em;
   }
-''', js=None) as demo:
     gr.Markdown("""
 <div class="dicta-header">
   <a href="">
-    <img src="file/logo_am.png" alt="Dicta Logo" class="dicta-logo">
   </a>
   <div class="dicta-intro-text">
-    <h1>הדגמה ראשונית</h1>
      <span dir='rtl'>ברוכים הבאים לדמו האינטראקטיבי הראשון. חקרו את יכולות המודל וראו כיצד הוא יכול לסייע לכם במשימותיכם</span><br/>
-     <span dir='rtl'>הדמו נכתב על ידי רועי רתם תוך שימוש במודל שפה דיקטה שפותח על ידי מפא"ת</span><br/>
   </div>
 </div>
 """)
-    chatbot = gr.Chatbot()
-    query = gr.Textbox(placeholder="הכנס שאלה בעברית (או באנגלית!)", rtl=True)
-    clear_btn = gr.Button("נקה שיחה")
-    demo_state = gr.State([])
-    query.submit(respond, [query, demo_state], [chatbot, query, demo_state])
-    clear_btn.click(clear_session, [], demo_state, chatbot)
-demo.queue(api_open=False).launch(max_threads=20, share=False, allowed_paths=['logo_am.png'])

 import os
 import gradio as gr
 from http import HTTPStatus
+import openai
 from typing import Generator, List, Optional, Tuple, Dict
 from urllib.error import HTTPError
+API_URL = os.getenv('API_URL')
+API_KEY = os.getenv('API_KEY')
+CUSTOM_JS = os.getenv('CUSTOM_JS', None)
+oai_client = openai.OpenAI(api_key=API_KEY, base_url=API_URL)
 History = List[Tuple[str, str]]
 Messages = List[Dict[str, str]]
 def clear_session() -> History:
+    return '', []
 def history_to_messages(history: History) -> Messages:
     messages = []
         history.append([q['content'], r['content']])
     return history
+def model_chat(query: Optional[str], history: Optional[History]) -> Generator[Tuple[str, History], None, None]:
     if query is None:
         query = ''
     if history is None:
         history = []
     if not query.strip():
+        return
+    messages = history_to_messages(history)
+    messages.append({'role': 'user', 'content': query.strip()})
+    gen = oai_client.chat.completions.create(
+        model='dicta-il/dictalm2.0-instruct',
+        messages=messages,
+        temperature=0.7,
+        max_tokens=1024,
+        top_p=0.9,
+        stream=True
+    )
+    full_response = ''
+    for completion in gen:
+        text = completion.choices[0].delta.content
+        full_response += text or ''
+        yield full_response
 with gr.Blocks(css='''
     .gr-group {direction: rtl;}
     .chatbot{text-align:right;}
   textarea {
     font-size: 1.2em;
   }
+''', js=CUSTOM_JS) as demo:
     gr.Markdown("""
 <div class="dicta-header">
   <a href="">
+    <img src="file/logo111.png" alt="Dicta Logo" class="dicta-logo">
   </a>
   <div class="dicta-intro-text">
+    <h1>צ'אט מערכי - הדגמה ראשונית</h1>
      <span dir='rtl'>ברוכים הבאים לדמו האינטראקטיבי הראשון. חקרו את יכולות המודל וראו כיצד הוא יכול לסייע לכם במשימותיכם</span><br/>
+     <span dir='rtl'>הדמו נכתב על ידי סרן רועי רתם תוך שימוש במודל שפה דיקטה שפותח על ידי מפא"ת</span><br/>
   </div>
 </div>
 """)
+    interface = gr.ChatInterface(model_chat, fill_height=False)
+    interface.chatbot.rtl = True
+    interface.textbox.placeholder = "הכנס שאלה בעברית (או באנגלית!)"
+    interface.textbox.rtl = True
+    interface.textbox.text_align = 'right'
+    interface.theme_css += '.gr-group {direction: rtl !important;}'
+demo.queue(api_open=False).launch(max_threads=20, share=False, allowed_paths=['dicta-logo.jpg'])