Spaces:
Runtime error
Runtime error
Commit
·
d90e779
1
Parent(s):
215c457
Add dictalm2.0-instruct folder
Browse files
app.py
CHANGED
@@ -5,10 +5,9 @@ import openai
|
|
5 |
from typing import Generator, List, Optional, Tuple, Dict
|
6 |
from urllib.error import HTTPError
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
oai_client = openai.OpenAI(api_key=API_KEY, base_url=API_URL)
|
12 |
|
13 |
History = List[Tuple[str, str]]
|
14 |
Messages = List[Dict[str, str]]
|
@@ -38,20 +37,27 @@ def model_chat(query: Optional[str], history: Optional[History]) -> Generator[Tu
|
|
38 |
return
|
39 |
messages = history_to_messages(history)
|
40 |
messages.append({'role': 'user', 'content': query.strip()})
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
)
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
yield full_response
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
with gr.Blocks(css='''
|
56 |
.gr-group {direction: rtl;}
|
57 |
.chatbot{text-align:right;}
|
@@ -118,4 +124,4 @@ with gr.Blocks(css='''
|
|
118 |
interface.textbox.text_align = 'right'
|
119 |
interface.theme_css += '.gr-group {direction: rtl !important;}'
|
120 |
|
121 |
-
demo.queue(api_open=False).launch(max_threads=20, share=False, allowed_paths=['
|
|
|
5 |
from typing import Generator, List, Optional, Tuple, Dict
|
6 |
from urllib.error import HTTPError
|
7 |
|
8 |
+
# Load the model and tokenizer
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("./dictalm2.0-instruct")
|
10 |
+
model = AutoModelForCausalLM.from_pretrained("./dictalm2.0-instruct")
|
|
|
11 |
|
12 |
History = List[Tuple[str, str]]
|
13 |
Messages = List[Dict[str, str]]
|
|
|
37 |
return
|
38 |
messages = history_to_messages(history)
|
39 |
messages.append({'role': 'user', 'content': query.strip()})
|
40 |
+
|
41 |
+
# Combine all messages into one formatted input text
|
42 |
+
formatted_text = "<s>" + "".join(f"[INST] {m['content']} [/INST]" for m in messages if m['role'] == 'user')
|
43 |
+
inputs = tokenizer(formatted_text, return_tensors='pt')
|
44 |
+
|
45 |
+
# Generate the output
|
46 |
+
outputs = model.generate(inputs['input_ids'], max_length=1024, temperature=0.7, top_p=0.9)
|
47 |
+
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
48 |
+
|
49 |
+
# Simulate streaming by yielding the response in chunks
|
50 |
+
chunk_size = 20 # You can adjust the chunk size
|
51 |
+
for i in range(0, len(full_response), chunk_size):
|
52 |
+
yield full_response[i:i+chunk_size]
|
53 |
+
|
54 |
+
def run_flask():
|
55 |
+
app.run(host='0.0.0.0', port=5000)
|
56 |
+
|
57 |
+
# Run Flask in a separate thread
|
58 |
+
threading.Thread(target=run_flask).start()
|
59 |
+
|
60 |
+
|
61 |
with gr.Blocks(css='''
|
62 |
.gr-group {direction: rtl;}
|
63 |
.chatbot{text-align:right;}
|
|
|
124 |
interface.textbox.text_align = 'right'
|
125 |
interface.theme_css += '.gr-group {direction: rtl !important;}'
|
126 |
|
127 |
+
demo.queue(api_open=False).launch(max_threads=20, share=False, allowed_paths=['logo111.png'])
|