Spaces:
Sleeping
Sleeping
Upload main.py
Browse files
main.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from flask import Flask, request, jsonify
|
2 |
+
# from llama_cpp import Llama
|
3 |
+
# from huggingface_hub import hf_hub_download
|
4 |
+
# from model import model_download
|
5 |
+
# # model_download()
|
6 |
+
|
7 |
+
# # Initialize the Llama model with chat format set to "llama-2"
|
8 |
+
# llm = Llama(model_path="E:\langchain-chat-gui-main\langchain-chat-gui-main\model-unsloth.Q8_0.gguf", chat_format="llama-2")
|
9 |
+
|
10 |
+
# # Define the system prompt
|
11 |
+
# system_prompt = (
|
12 |
+
# "[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
|
13 |
+
# "You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
|
14 |
+
# "Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
|
15 |
+
# "Your purpose is to answer questions related to Indian law and marginalized communities in India. "
|
16 |
+
# "You have been trained on various legal topics. "
|
17 |
+
# "Your responses should be concise, meaningful, and accurate."
|
18 |
+
# "When a user asks for more information or details, provide a more comprehensive explanation. "
|
19 |
+
# "Your responses should be respectful and informative."
|
20 |
+
# "Do not provide information unrelated to India or Indian law. "
|
21 |
+
# "Feel free to ask questions."
|
22 |
+
# )
|
23 |
+
|
24 |
+
# # Initialize the conversation history list with the system prompt
|
25 |
+
# conversation_history = [{"role": "system", "content": system_prompt}]
|
26 |
+
|
27 |
+
# # Create a Flask application
|
28 |
+
# app = Flask(__name__)
|
29 |
+
|
30 |
+
# # Define the model function
|
31 |
+
# def model(query):
|
32 |
+
# global conversation_history # Declare global to update history
|
33 |
+
|
34 |
+
# # Add the user's query to the conversation history
|
35 |
+
# conversation_history.append({"role": "user", "content": query})
|
36 |
+
|
37 |
+
# # Calculate the total number of tokens in the conversation history
|
38 |
+
# # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
|
39 |
+
# total_tokens = sum(len(message["content"].split()) for message in conversation_history)
|
40 |
+
|
41 |
+
# # If the total number of tokens exceeds the model's context window, trim the history
|
42 |
+
# # You may need to adjust the 512 value based on your model's actual context window size
|
43 |
+
# context_window_size = 512
|
44 |
+
# while total_tokens > context_window_size:
|
45 |
+
# # Remove the oldest messages from the conversation history
|
46 |
+
# conversation_history.pop(0)
|
47 |
+
# # Recalculate the total number of tokens
|
48 |
+
# total_tokens = sum(len(message["content"].split()) for message in conversation_history)
|
49 |
+
|
50 |
+
# # Generate chat completion with the conversation history
|
51 |
+
# response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
|
52 |
+
|
53 |
+
# # Extract the assistant's response from the completion dictionary
|
54 |
+
# if response and 'choices' in response and response['choices']:
|
55 |
+
# assistant_response = response['choices'][0]['message']['content']
|
56 |
+
# assistant_response = assistant_response.strip()
|
57 |
+
|
58 |
+
# # Add the assistant's response to the conversation history
|
59 |
+
# conversation_history.append({"role": "assistant", "content": assistant_response})
|
60 |
+
|
61 |
+
# # Print the assistant's response
|
62 |
+
# print("Assistant response:", assistant_response)
|
63 |
+
|
64 |
+
# # Return the assistant's response
|
65 |
+
# return assistant_response
|
66 |
+
# else:
|
67 |
+
# print("Error: Invalid response structure.")
|
68 |
+
# return None
|
69 |
+
|
70 |
+
|
71 |
+
# # Define the endpoint for the API
|
72 |
+
# @app.route("/chat", methods=["GET"])
|
73 |
+
# def chat_endpoint():
|
74 |
+
# # Get the query parameter from the request
|
75 |
+
# query = request.args.get("query")
|
76 |
+
|
77 |
+
# # Check if the "refresh" parameter is set to "true"
|
78 |
+
# refresh = request.args.get("refresh")
|
79 |
+
# if refresh and refresh.lower() == "true":
|
80 |
+
# # Clear the conversation history
|
81 |
+
# global conversation_history
|
82 |
+
# conversation_history = [{"role": "system", "content": system_prompt}]
|
83 |
+
# return jsonify({"response": "Conversation history cleared."})
|
84 |
+
|
85 |
+
# # If there is no query, return an error message
|
86 |
+
# if not query:
|
87 |
+
# return jsonify({"error": "Query parameter is required."}), 400
|
88 |
+
|
89 |
+
# # Call the model function with the query
|
90 |
+
# response = model(query)
|
91 |
+
|
92 |
+
# # Return the assistant's response as JSON
|
93 |
+
# return jsonify({"response": response})
|
94 |
+
|
95 |
+
# # Run the Flask app
|
96 |
+
# if __name__ == "__main__":
|
97 |
+
# app.run(host="0.0.0.0", port=5000)
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
from flask import Flask, request, jsonify
|
102 |
+
from llama_cpp import Llama
|
103 |
+
import logging
|
104 |
+
|
105 |
+
# Initialize logging
|
106 |
+
logging.basicConfig(level=logging.INFO)
|
107 |
+
|
108 |
+
# Initialize the Llama model with chat format set to "llama-2"
|
109 |
+
llm = Llama(model_path="E:\\langchain-chat-gui-main\\langchain-chat-gui-main\\llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
|
110 |
+
|
111 |
+
# Define the system prompt
|
112 |
+
system_prompt = (
|
113 |
+
"[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
|
114 |
+
"You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
|
115 |
+
"Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
|
116 |
+
"Your purpose is to answer questions related to Indian law and marginalized communities in India. "
|
117 |
+
"You have been trained on various legal topics. "
|
118 |
+
"Your responses should be concise, meaningful, and accurate."
|
119 |
+
"When a user asks for more information or details, provide a more comprehensive explanation. "
|
120 |
+
"Your responses should be respectful and informative."
|
121 |
+
"Do not provide information unrelated to India or Indian law. "
|
122 |
+
"Feel free to ask questions."
|
123 |
+
)
|
124 |
+
|
125 |
+
# Initialize the conversation history list with the system prompt
|
126 |
+
conversation_history = [{"role": "system", "content": system_prompt}]
|
127 |
+
|
128 |
+
# Define conversation history size limit
|
129 |
+
MAX_CONVERSATION_HISTORY_SIZE = 10
|
130 |
+
|
131 |
+
# Create a Flask application
|
132 |
+
app = Flask(__name__)
|
133 |
+
|
134 |
+
# Define a function to calculate the total number of tokens in conversation history using the Llama model's tokenizer
|
135 |
+
def calculate_total_tokens(messages):
|
136 |
+
try:
|
137 |
+
# Convert content to string and tokenize
|
138 |
+
total_tokens = sum(len(llm.tokenize(str(message["content"]), add_bos=False, special=True)) for message in messages)
|
139 |
+
return total_tokens
|
140 |
+
except Exception as e:
|
141 |
+
logging.error(f"Error during tokenization: {e}")
|
142 |
+
return 0 # Return a safe value (0) to handle the error
|
143 |
+
|
144 |
+
# Define a function to trim the conversation history if the total number of tokens exceeds the context window size
|
145 |
+
def trim_conversation_history():
|
146 |
+
global conversation_history
|
147 |
+
total_tokens = calculate_total_tokens(conversation_history)
|
148 |
+
context_window_size = 512
|
149 |
+
|
150 |
+
while total_tokens > context_window_size:
|
151 |
+
# Remove the oldest messages from the conversation history
|
152 |
+
conversation_history.pop(0)
|
153 |
+
# Recalculate the total number of tokens
|
154 |
+
total_tokens = calculate_total_tokens(conversation_history)
|
155 |
+
|
156 |
+
# Define the model function
|
157 |
+
def model(query):
|
158 |
+
global conversation_history
|
159 |
+
|
160 |
+
# Add the user's query to the conversation history
|
161 |
+
conversation_history.append({"role": "user", "content": query})
|
162 |
+
|
163 |
+
# Calculate the total number of tokens in the conversation history
|
164 |
+
total_tokens = calculate_total_tokens(conversation_history)
|
165 |
+
|
166 |
+
# If the total number of tokens exceeds the model's context window, trim the history
|
167 |
+
trim_conversation_history()
|
168 |
+
|
169 |
+
# Generate chat completion with the conversation history
|
170 |
+
try:
|
171 |
+
response = llm.create_chat_completion(messages=conversation_history, max_tokens=200)
|
172 |
+
|
173 |
+
# Extract the assistant's response from the completion dictionary
|
174 |
+
if response and 'choices' in response and response['choices']:
|
175 |
+
assistant_response = response['choices'][0]['message']['content']
|
176 |
+
assistant_response = assistant_response.strip()
|
177 |
+
|
178 |
+
# Add the assistant's response to the conversation history
|
179 |
+
conversation_history.append({"role": "assistant", "content": assistant_response})
|
180 |
+
|
181 |
+
# Return the assistant's response
|
182 |
+
return assistant_response
|
183 |
+
else:
|
184 |
+
logging.error("Error: Invalid response structure.")
|
185 |
+
return None
|
186 |
+
except Exception as e:
|
187 |
+
logging.error(f"Error during chat completion: {e}")
|
188 |
+
return None
|
189 |
+
|
190 |
+
# Define the endpoint for the API
|
191 |
+
@app.route("/chat", methods=["GET"])
|
192 |
+
def chat_endpoint():
|
193 |
+
# Get the query parameter from the request
|
194 |
+
query = request.args.get("query")
|
195 |
+
|
196 |
+
# Check if the "refresh" parameter is set to "true"
|
197 |
+
refresh = request.args.get("refresh")
|
198 |
+
if refresh and refresh.lower() == "true":
|
199 |
+
# Clear the conversation history
|
200 |
+
global conversation_history
|
201 |
+
conversation_history = [{"role": "system", "content": system_prompt}]
|
202 |
+
return jsonify({"response": "Conversation history cleared."})
|
203 |
+
|
204 |
+
# If there is no query, return an error message
|
205 |
+
if not query:
|
206 |
+
return jsonify({"error": "Query parameter is required."}), 400
|
207 |
+
|
208 |
+
# Call the model function with the query
|
209 |
+
response = model(query)
|
210 |
+
|
211 |
+
# Return the assistant's response as JSON
|
212 |
+
if response is None:
|
213 |
+
return jsonify({"error": "An error occurred while processing the request."}), 500
|
214 |
+
|
215 |
+
# Check the size of the conversation history and clear if necessary
|
216 |
+
if len(conversation_history) > MAX_CONVERSATION_HISTORY_SIZE:
|
217 |
+
conversation_history = [{"role": "system", "content": system_prompt}]
|
218 |
+
return jsonify({"response": response, "notification": "Conversation history was cleared due to exceeding maximum size."})
|
219 |
+
print(response)
|
220 |
+
return jsonify({"response": response})
|
221 |
+
|
222 |
+
# Run the Flask app
|
223 |
+
if __name__ == "__main__":
|
224 |
+
app.run(host="0.0.0.0", port=5000)
|