Spaces:

Royrotem100
/

Roy-Rottem-Chatbot

Runtime error

File size: 4,295 Bytes

import os
import gradio as gr
from http import HTTPStatus
import openai
from typing import Generator, List, Optional, Tuple, Dict
from urllib.error import HTTPError
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import threading

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./dictalm2.0-instruct")
model = AutoModelForCausalLM.from_pretrained("./dictalm2.0-instruct")

History = List[Tuple[str, str]]
Messages = List[Dict[str, str]]

def clear_session() -> History:
    return '', []

def history_to_messages(history: History) -> Messages:
    messages = []
    for h in history:
        messages.append({'role': 'user', 'content': h[0].strip()})
        messages.append({'role': 'assistant', 'content': h[1].strip()})
    return messages

def messages_to_history(messages: Messages) -> Tuple[str, History]:
    history = []
    for q, r in zip(messages[0::2], messages[1::2]):
        history.append([q['content'], r['content']])
    return history

def model_chat(query: Optional[str], history: Optional[History]) -> Generator[Tuple[str, History], None, None]:
    if query is None:
        query = ''
    if history is None:
        history = []
    if not query.strip():
        return
    messages = history_to_messages(history)
    messages.append({'role': 'user', 'content': query.strip()})
    
    # Combine all messages into one formatted input text
    formatted_text = "<s>" + "".join(f"[INST] {m['content']} [/INST]" for m in messages if m['role'] == 'user')
    inputs = tokenizer(formatted_text, return_tensors='pt')

    # Generate the output
    outputs = model.generate(inputs['input_ids'], max_length=1024, temperature=0.7, top_p=0.9)
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Simulate streaming by yielding the response in chunks
    chunk_size = 20  # You can adjust the chunk size
    for i in range(0, len(full_response), chunk_size):
        yield full_response[i:i+chunk_size]

def run_flask():
    app.run(host='0.0.0.0', port=5000)

# Run Flask in a separate thread
threading.Thread(target=run_flask).start()


with gr.Blocks(css='''
    .gr-group {direction: rtl;}
    .chatbot{text-align:right;}
  .dicta-header {
    background-color: var(--input-background-fill);  /* Replace with desired background color */
    border-radius: 10px;
    padding: 20px;
    text-align: center;
    display: flex;
    flex-direction: row;
    align-items: center;
    box-shadow: var(--block-shadow);
    border-color: var(--block-border-color);
    border-width: 1px;
  }
               
               
  @media (max-width: 768px) {
    .dicta-header {
      flex-direction: column; /* Change to vertical for mobile devices */
    }
  }

  .chatbot.prose {
    font-size: 1.2em;
  }
  .dicta-logo {
    width: 150px; /* Replace with actual logo width as desired */
    height: auto;
    margin-bottom: 20px;
  }

  .dicta-intro-text {
    margin-bottom: 20px;
    text-align: center;
    display: flex;
    flex-direction: column;
    align-items: center;
    width: 100%;
    font-size: 1.1em;
  }
               
  textarea {
    font-size: 1.2em;
  }
''', js=CUSTOM_JS) as demo:
    gr.Markdown("""
<div class="dicta-header">
  <a href="">
    <img src="file/logo111.png" alt="Dicta Logo" class="dicta-logo">
  </a>  
  <div class="dicta-intro-text">
    <h1>צ'אט מערכי - הדגמה ראשונית</h1>
     <span dir='rtl'>ברוכים הבאים לדמו האינטראקטיבי הראשון. חקרו את יכולות המודל וראו כיצד הוא יכול לסייע לכם במשימותיכם</span><br/>
     <span dir='rtl'>הדמו נכתב על ידי סרן רועי רתם תוך שימוש במודל שפה דיקטה שפותח על ידי מפא"ת</span><br/>
  </div>
</div>
""")
    
    interface = gr.ChatInterface(model_chat, fill_height=False)
    interface.chatbot.rtl = True
    interface.textbox.placeholder = "הכנס שאלה בעברית (או באנגלית!)"
    interface.textbox.rtl = True
    interface.textbox.text_align = 'right'
    interface.theme_css += '.gr-group {direction: rtl !important;}'

demo.queue(api_open=False).launch(max_threads=20, share=False, allowed_paths=['logo111.png'])