alishafique commited on
Commit
34d5793
·
verified ·
1 Parent(s): 4a6b784

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -96
app.py CHANGED
@@ -1,19 +1,16 @@
1
- import os
2
- import json
3
- import subprocess
4
  import gradio as gr
5
- from threading import Thread
6
  from huggingface_hub import hf_hub_download
7
- from llama_cpp import Llama
8
- from datetime import datetime
9
 
10
- # Load model from Hugging Face Hub
11
- MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
12
- MODEL_FILE = "model-Q8_0.gguf"
 
13
 
14
- model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
15
 
16
- # Initialize Llama model
17
  llama = Llama(
18
  model_path=model_path_file,
19
  n_gpu_layers=40, # Adjust based on VRAM
@@ -23,17 +20,15 @@ llama = Llama(
23
  verbose=True # Enable debug logging
24
  )
25
 
26
- CHAT_TEMPLATE = "Alif Chat"
27
- CONTEXT_LENGTH = 4096
28
- COLOR = "blue"
29
- EMOJI = "💬"
30
- DESCRIPTION = "Urdu AI Chatbot powered by Llama.cpp"
31
 
32
- # Function to generate responses
33
- def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
34
- chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
35
- response = llama(chat_prompt, max_tokens=max_new_tokens, stop=["Q:", "\n"], echo=False, stream=True)
36
 
 
 
 
37
  text = ""
38
  for chunk in response:
39
  content = chunk["choices"][0]["text"]
@@ -41,81 +36,15 @@ def generate_response(message, history, system_prompt, temperature, max_new_toke
41
  text += content
42
  yield text
43
 
44
- # Create Gradio interface
45
- with gr.Blocks() as demo:
46
- chatbot = gr.Chatbot(label="Urdu Chatbot", likeable=True, render=False)
47
- chat = gr.ChatInterface(
48
- generate_response,
49
- chatbot=chatbot,
50
- title=EMOJI + " " + "Alif-1.0 Chatbot",
51
- description=DESCRIPTION,
52
- examples=[
53
- ["شہر کراچی کے بارے میں بتاؤ"],
54
- ["قابل تجدید توانائی کیا ہے؟"],
55
- ["پاکستان کی تاریخ کے بارے میں بتائیں۔"]
56
- ],
57
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
58
- additional_inputs=[
59
- gr.Textbox("", label="System prompt", render=False),
60
- gr.Slider(0, 1, 0.6, label="Temperature", render=False),
61
- gr.Slider(128, CONTEXT_LENGTH, 1024, label="Max new tokens", render=False),
62
- gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
63
- gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
64
- gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
65
- ],
66
- theme=gr.themes.Soft(primary_hue=COLOR),
67
- )
68
-
69
- demo.queue(max_size=20).launch(share=True)
70
-
71
-
72
- # import llama_cpp
73
- # from llama_cpp import Llama
74
- # # import llama_cpp.llama_tokenizer
75
- # import gradio as gr
76
-
77
- # from huggingface_hub import hf_hub_download
78
-
79
- # model_name = "large-traversaal/Alif-1.0-8B-Instruct"
80
- # model_file = "model-Q8_0.gguf"
81
- # model_path_file = hf_hub_download(model_name,
82
- # filename=model_file,)
83
 
 
 
 
 
 
 
 
 
84
 
85
- # llama = Llama(
86
- # model_path=model_path_file,
87
- # n_gpu_layers=40, # Adjust based on VRAM
88
- # n_threads=8, # Match CPU cores
89
- # n_batch=512, # Optimize for better VRAM usage
90
- # n_ctx=4096, # Context window size
91
- # verbose=True # Enable debug logging
92
- # )
93
-
94
- # chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
95
-
96
- # # Function to generate text with streaming output
97
- # def chat_with_ai(prompt):
98
- # query = chat_prompt.format(inp=prompt)
99
-
100
- # #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
101
- # response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
102
-
103
- # text = ""
104
- # for chunk in response:
105
- # content = chunk["choices"][0]["text"]
106
- # if content:
107
- # text += content
108
- # yield text
109
-
110
-
111
- # # Gradio UI setup
112
- # demo = gr.Interface(
113
- # fn=chat_with_ai, # Streaming function
114
- # inputs="text", # User input
115
- # outputs="text", # Model response
116
- # title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
117
- # description="Enter a prompt and get a streamed response."
118
- # )
119
-
120
- # # Launch the Gradio app
121
- # demo.launch(share=True)
 
1
+ import llama_cpp
2
+ from llama_cpp import Llama
3
+ # import llama_cpp.llama_tokenizer
4
  import gradio as gr
5
+
6
  from huggingface_hub import hf_hub_download
 
 
7
 
8
+ model_name = "large-traversaal/Alif-1.0-8B-Instruct"
9
+ model_file = "model-Q8_0.gguf"
10
+ model_path_file = hf_hub_download(model_name,
11
+ filename=model_file,)
12
 
 
13
 
 
14
  llama = Llama(
15
  model_path=model_path_file,
16
  n_gpu_layers=40, # Adjust based on VRAM
 
20
  verbose=True # Enable debug logging
21
  )
22
 
23
+ chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
 
 
 
 
24
 
25
+ # Function to generate text with streaming output
26
+ def chat_with_ai(prompt):
27
+ query = chat_prompt.format(inp=prompt)
 
28
 
29
+ #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
30
+ response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
31
+
32
  text = ""
33
  for chunk in response:
34
  content = chunk["choices"][0]["text"]
 
36
  text += content
37
  yield text
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Gradio UI setup
41
+ demo = gr.Interface(
42
+ fn=chat_with_ai, # Streaming function
43
+ inputs="text", # User input
44
+ outputs="text", # Model response
45
+ title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
46
+ description="Enter a prompt and get a streamed response."
47
+ )
48
 
49
+ # Launch the Gradio app
50
+ demo.launch(share=True)