vkrishnan569 commited on
Commit
30e80b0
·
verified ·
1 Parent(s): 0d8eb3a

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +224 -0
main.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from flask import Flask, request, jsonify
2
+ # from llama_cpp import Llama
3
+ # from huggingface_hub import hf_hub_download
4
+ # from model import model_download
5
+ # # model_download()
6
+
7
+ # # Initialize the Llama model with chat format set to "llama-2"
8
+ # llm = Llama(model_path="E:\langchain-chat-gui-main\langchain-chat-gui-main\model-unsloth.Q8_0.gguf", chat_format="llama-2")
9
+
10
+ # # Define the system prompt
11
+ # system_prompt = (
12
+ # "[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
13
+ # "You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
14
+ # "Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
15
+ # "Your purpose is to answer questions related to Indian law and marginalized communities in India. "
16
+ # "You have been trained on various legal topics. "
17
+ # "Your responses should be concise, meaningful, and accurate."
18
+ # "When a user asks for more information or details, provide a more comprehensive explanation. "
19
+ # "Your responses should be respectful and informative."
20
+ # "Do not provide information unrelated to India or Indian law. "
21
+ # "Feel free to ask questions."
22
+ # )
23
+
24
+ # # Initialize the conversation history list with the system prompt
25
+ # conversation_history = [{"role": "system", "content": system_prompt}]
26
+
27
+ # # Create a Flask application
28
+ # app = Flask(__name__)
29
+
30
+ # # Define the model function
31
+ # def model(query):
32
+ # global conversation_history # Declare global to update history
33
+
34
+ # # Add the user's query to the conversation history
35
+ # conversation_history.append({"role": "user", "content": query})
36
+
37
+ # # Calculate the total number of tokens in the conversation history
38
+ # # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
39
+ # total_tokens = sum(len(message["content"].split()) for message in conversation_history)
40
+
41
+ # # If the total number of tokens exceeds the model's context window, trim the history
42
+ # # You may need to adjust the 512 value based on your model's actual context window size
43
+ # context_window_size = 512
44
+ # while total_tokens > context_window_size:
45
+ # # Remove the oldest messages from the conversation history
46
+ # conversation_history.pop(0)
47
+ # # Recalculate the total number of tokens
48
+ # total_tokens = sum(len(message["content"].split()) for message in conversation_history)
49
+
50
+ # # Generate chat completion with the conversation history
51
+ # response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
52
+
53
+ # # Extract the assistant's response from the completion dictionary
54
+ # if response and 'choices' in response and response['choices']:
55
+ # assistant_response = response['choices'][0]['message']['content']
56
+ # assistant_response = assistant_response.strip()
57
+
58
+ # # Add the assistant's response to the conversation history
59
+ # conversation_history.append({"role": "assistant", "content": assistant_response})
60
+
61
+ # # Print the assistant's response
62
+ # print("Assistant response:", assistant_response)
63
+
64
+ # # Return the assistant's response
65
+ # return assistant_response
66
+ # else:
67
+ # print("Error: Invalid response structure.")
68
+ # return None
69
+
70
+
71
+ # # Define the endpoint for the API
72
+ # @app.route("/chat", methods=["GET"])
73
+ # def chat_endpoint():
74
+ # # Get the query parameter from the request
75
+ # query = request.args.get("query")
76
+
77
+ # # Check if the "refresh" parameter is set to "true"
78
+ # refresh = request.args.get("refresh")
79
+ # if refresh and refresh.lower() == "true":
80
+ # # Clear the conversation history
81
+ # global conversation_history
82
+ # conversation_history = [{"role": "system", "content": system_prompt}]
83
+ # return jsonify({"response": "Conversation history cleared."})
84
+
85
+ # # If there is no query, return an error message
86
+ # if not query:
87
+ # return jsonify({"error": "Query parameter is required."}), 400
88
+
89
+ # # Call the model function with the query
90
+ # response = model(query)
91
+
92
+ # # Return the assistant's response as JSON
93
+ # return jsonify({"response": response})
94
+
95
+ # # Run the Flask app
96
+ # if __name__ == "__main__":
97
+ # app.run(host="0.0.0.0", port=5000)
98
+
99
+
100
+
101
+ from flask import Flask, request, jsonify
102
+ from llama_cpp import Llama
103
+ import logging
104
+
105
+ # Initialize logging
106
+ logging.basicConfig(level=logging.INFO)
107
+
108
+ # Initialize the Llama model with chat format set to "llama-2"
109
+ llm = Llama(model_path="E:\\langchain-chat-gui-main\\langchain-chat-gui-main\\llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
110
+
111
+ # Define the system prompt
112
+ system_prompt = (
113
+ "[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
114
+ "You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
115
+ "Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
116
+ "Your purpose is to answer questions related to Indian law and marginalized communities in India. "
117
+ "You have been trained on various legal topics. "
118
+ "Your responses should be concise, meaningful, and accurate."
119
+ "When a user asks for more information or details, provide a more comprehensive explanation. "
120
+ "Your responses should be respectful and informative."
121
+ "Do not provide information unrelated to India or Indian law. "
122
+ "Feel free to ask questions."
123
+ )
124
+
125
+ # Initialize the conversation history list with the system prompt
126
+ conversation_history = [{"role": "system", "content": system_prompt}]
127
+
128
+ # Define conversation history size limit
129
+ MAX_CONVERSATION_HISTORY_SIZE = 10
130
+
131
+ # Create a Flask application
132
+ app = Flask(__name__)
133
+
134
+ # Define a function to calculate the total number of tokens in conversation history using the Llama model's tokenizer
135
+ def calculate_total_tokens(messages):
136
+ try:
137
+ # Convert content to string and tokenize
138
+ total_tokens = sum(len(llm.tokenize(str(message["content"]), add_bos=False, special=True)) for message in messages)
139
+ return total_tokens
140
+ except Exception as e:
141
+ logging.error(f"Error during tokenization: {e}")
142
+ return 0 # Return a safe value (0) to handle the error
143
+
144
+ # Define a function to trim the conversation history if the total number of tokens exceeds the context window size
145
+ def trim_conversation_history():
146
+ global conversation_history
147
+ total_tokens = calculate_total_tokens(conversation_history)
148
+ context_window_size = 512
149
+
150
+ while total_tokens > context_window_size:
151
+ # Remove the oldest messages from the conversation history
152
+ conversation_history.pop(0)
153
+ # Recalculate the total number of tokens
154
+ total_tokens = calculate_total_tokens(conversation_history)
155
+
156
+ # Define the model function
157
+ def model(query):
158
+ global conversation_history
159
+
160
+ # Add the user's query to the conversation history
161
+ conversation_history.append({"role": "user", "content": query})
162
+
163
+ # Calculate the total number of tokens in the conversation history
164
+ total_tokens = calculate_total_tokens(conversation_history)
165
+
166
+ # If the total number of tokens exceeds the model's context window, trim the history
167
+ trim_conversation_history()
168
+
169
+ # Generate chat completion with the conversation history
170
+ try:
171
+ response = llm.create_chat_completion(messages=conversation_history, max_tokens=200)
172
+
173
+ # Extract the assistant's response from the completion dictionary
174
+ if response and 'choices' in response and response['choices']:
175
+ assistant_response = response['choices'][0]['message']['content']
176
+ assistant_response = assistant_response.strip()
177
+
178
+ # Add the assistant's response to the conversation history
179
+ conversation_history.append({"role": "assistant", "content": assistant_response})
180
+
181
+ # Return the assistant's response
182
+ return assistant_response
183
+ else:
184
+ logging.error("Error: Invalid response structure.")
185
+ return None
186
+ except Exception as e:
187
+ logging.error(f"Error during chat completion: {e}")
188
+ return None
189
+
190
+ # Define the endpoint for the API
191
+ @app.route("/chat", methods=["GET"])
192
+ def chat_endpoint():
193
+ # Get the query parameter from the request
194
+ query = request.args.get("query")
195
+
196
+ # Check if the "refresh" parameter is set to "true"
197
+ refresh = request.args.get("refresh")
198
+ if refresh and refresh.lower() == "true":
199
+ # Clear the conversation history
200
+ global conversation_history
201
+ conversation_history = [{"role": "system", "content": system_prompt}]
202
+ return jsonify({"response": "Conversation history cleared."})
203
+
204
+ # If there is no query, return an error message
205
+ if not query:
206
+ return jsonify({"error": "Query parameter is required."}), 400
207
+
208
+ # Call the model function with the query
209
+ response = model(query)
210
+
211
+ # Return the assistant's response as JSON
212
+ if response is None:
213
+ return jsonify({"error": "An error occurred while processing the request."}), 500
214
+
215
+ # Check the size of the conversation history and clear if necessary
216
+ if len(conversation_history) > MAX_CONVERSATION_HISTORY_SIZE:
217
+ conversation_history = [{"role": "system", "content": system_prompt}]
218
+ return jsonify({"response": response, "notification": "Conversation history was cleared due to exceeding maximum size."})
219
+ print(response)
220
+ return jsonify({"response": response})
221
+
222
+ # Run the Flask app
223
+ if __name__ == "__main__":
224
+ app.run(host="0.0.0.0", port=5000)