Spaces:
Running
Running
Upload 2 files
Browse files- main.py +111 -0
- requirements.txt +15 -0
main.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.llms import LlamaCpp
|
2 |
+
from langchain.callbacks.manager import CallbackManager
|
3 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
4 |
+
import gradio as gr
|
5 |
+
import re
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
# MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf"
|
10 |
+
# TEMPRATURE = 0.3
|
11 |
+
# MAX_TOKENS = 800
|
12 |
+
# for k,v in os.environ.items():
|
13 |
+
# if(k=="MODEL_PATH"):
|
14 |
+
# MODEL_PATH = v
|
15 |
+
# if(k== "TEMPRATURE"):
|
16 |
+
# TEMPRATURE = v
|
17 |
+
# if(k == "MAX_TOKENS"):
|
18 |
+
# MAX_TOKENS = v
|
19 |
+
|
20 |
+
# print("model: "+MODEL_PATH)
|
21 |
+
# print("temp: "+TEMPRATURE)
|
22 |
+
# print("max_tokens: "+MAX_TOKENS)
|
23 |
+
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
|
24 |
+
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
25 |
+
n_ctx=2048
|
26 |
+
|
27 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
28 |
+
path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf"
|
29 |
+
# persian_llama_7b.Q4_K_M.gguf
|
30 |
+
# persian_llama_7b.Q8_K_M.gguf
|
31 |
+
# persian_llama_7b.f32.gguf
|
32 |
+
# Make sure the model path is correct for your system!
|
33 |
+
llm = LlamaCpp(
|
34 |
+
model_path= path,
|
35 |
+
n_gpu_layers=n_gpu_layers, n_batch=n_batch,
|
36 |
+
callback_manager=callback_manager,
|
37 |
+
verbose=True,
|
38 |
+
n_ctx=n_ctx,
|
39 |
+
temperature=TEMPRATURE,
|
40 |
+
max_tokens=MAX_TOKENS,
|
41 |
+
top_p=1,
|
42 |
+
)
|
43 |
+
dal_image = ""
|
44 |
+
|
45 |
+
|
46 |
+
def generate_output(text):
|
47 |
+
result = ""
|
48 |
+
for s in llm.stream(text):
|
49 |
+
result += s
|
50 |
+
yield result
|
51 |
+
|
52 |
+
|
53 |
+
def clear():
|
54 |
+
return "", ""
|
55 |
+
|
56 |
+
def like_log(input, output):
|
57 |
+
with open("like_log.txt", "a") as f:
|
58 |
+
f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
|
59 |
+
|
60 |
+
def dislike_log(input, output):
|
61 |
+
with open("dislike_log.txt", "a") as f:
|
62 |
+
f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
|
63 |
+
|
64 |
+
|
65 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
66 |
+
gr.Markdown(
|
67 |
+
dal_image+
|
68 |
+
"""
|
69 |
+
<br>
|
70 |
+
<div dir="rtl">
|
71 |
+
<h1>
|
72 |
+
مدل هوش مصنوعی دال
|
73 |
+
</h1>
|
74 |
+
<p dir="rtl">
|
75 |
+
تماس با ما با
|
76 |
+
<br/>
|
77 |
+
info[@]aidal.ir
|
78 |
+
</p>
|
79 |
+
</div>
|
80 |
+
""")
|
81 |
+
with gr.Row():
|
82 |
+
inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
|
83 |
+
|
84 |
+
with gr.Row():
|
85 |
+
submit_btn= gr.Button("ارسال", variant="primary")
|
86 |
+
clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary")
|
87 |
+
with gr.Row():
|
88 |
+
outputs=gr.Textbox(label="خروجی",rtl=True)
|
89 |
+
submit_btn.click(fn=generate_output,
|
90 |
+
inputs= [inputs],
|
91 |
+
outputs= [outputs])
|
92 |
+
clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
|
93 |
+
with gr.Row():
|
94 |
+
like_btn= gr.Button("👍🏾")
|
95 |
+
dislike_btn= gr.Button("👎🏾")
|
96 |
+
like_btn.click(fn=like_log,
|
97 |
+
inputs= [inputs, outputs],
|
98 |
+
outputs=[]
|
99 |
+
)
|
100 |
+
dislike_btn.click(fn=dislike_log,
|
101 |
+
inputs= [inputs, outputs],
|
102 |
+
outputs=[]
|
103 |
+
)
|
104 |
+
# gr_interface = gr.Interface(fn=generate_output,
|
105 |
+
# inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True),
|
106 |
+
# outputs=gr.Textbox(label="خروجی",rtl=True),
|
107 |
+
# live=False,
|
108 |
+
# flagging_options=["👍🏾","👎🏾"],
|
109 |
+
# concurrency_limit=5)
|
110 |
+
|
111 |
+
demo.launch(server_name='0.0.0.0',share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.335
|
2 |
+
langsmith==0.0.64
|
3 |
+
peft==0.7.0
|
4 |
+
safetensors==0.3.1
|
5 |
+
scikit-learn==1.3.0
|
6 |
+
scipy==1.11.1
|
7 |
+
sentencepiece==0.1.99
|
8 |
+
tokenizers==0.15.0
|
9 |
+
torch==2.0.1
|
10 |
+
torchaudio==2.0.2
|
11 |
+
torchvision==0.15.2
|
12 |
+
transformers==4.36.0
|
13 |
+
bitsandbytes==0.41.1
|
14 |
+
gradio==4.13.0
|
15 |
+
llama-cpp-python==0.2.28
|