File size: 9,897 Bytes
30e80b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dade94e
30e80b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4374687
30e80b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4374687
30e80b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# from flask import Flask, request, jsonify
# from llama_cpp import Llama
# from huggingface_hub import hf_hub_download
# from model import model_download
# # model_download()

# # Initialize the Llama model with chat format set to "llama-2"
# llm = Llama(model_path="E:\langchain-chat-gui-main\langchain-chat-gui-main\model-unsloth.Q8_0.gguf", chat_format="llama-2")

# # Define the system prompt
# system_prompt = (
#     "[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
#     "You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
#      "Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
#     "Your purpose is to answer questions related to Indian law and marginalized communities in India. "
#     "You have been trained on various legal topics. "
#     "Your responses should be concise, meaningful, and accurate."
#     "When a user asks for more information or details, provide a more comprehensive explanation. "
#     "Your responses should be respectful and informative."
#     "Do not provide information unrelated to India or Indian law. "
#     "Feel free to ask questions."
# )

# # Initialize the conversation history list with the system prompt
# conversation_history = [{"role": "system", "content": system_prompt}]

# # Create a Flask application
# app = Flask(__name__)

# # Define the model function
# def model(query):
#     global conversation_history  # Declare global to update history

#     # Add the user's query to the conversation history
#     conversation_history.append({"role": "user", "content": query})

#     # Calculate the total number of tokens in the conversation history
#     # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
#     total_tokens = sum(len(message["content"].split()) for message in conversation_history)

#     # If the total number of tokens exceeds the model's context window, trim the history
#     # You may need to adjust the 512 value based on your model's actual context window size
#     context_window_size = 512
#     while total_tokens > context_window_size:
#         # Remove the oldest messages from the conversation history
#         conversation_history.pop(0)
#         # Recalculate the total number of tokens
#         total_tokens = sum(len(message["content"].split()) for message in conversation_history)

#     # Generate chat completion with the conversation history
#     response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
    
#     # Extract the assistant's response from the completion dictionary
#     if response and 'choices' in response and response['choices']:
#         assistant_response = response['choices'][0]['message']['content']
#         assistant_response = assistant_response.strip()
        
#         # Add the assistant's response to the conversation history
#         conversation_history.append({"role": "assistant", "content": assistant_response})

#         # Print the assistant's response
#         print("Assistant response:", assistant_response)
        
#         # Return the assistant's response
#         return assistant_response
#     else:
#         print("Error: Invalid response structure.")
#         return None


# # Define the endpoint for the API
# @app.route("/chat", methods=["GET"])
# def chat_endpoint():
#     # Get the query parameter from the request
#     query = request.args.get("query")
    
#     # Check if the "refresh" parameter is set to "true"
#     refresh = request.args.get("refresh")
#     if refresh and refresh.lower() == "true":
#         # Clear the conversation history
#         global conversation_history
#         conversation_history = [{"role": "system", "content": system_prompt}]
#         return jsonify({"response": "Conversation history cleared."})
    
#     # If there is no query, return an error message
#     if not query:
#         return jsonify({"error": "Query parameter is required."}), 400
    
#     # Call the model function with the query
#     response = model(query)
    
#     # Return the assistant's response as JSON
#     return jsonify({"response": response})

# # Run the Flask app
# if __name__ == "__main__":
#     app.run(host="0.0.0.0", port=5000)



from flask import Flask, request, jsonify
from llama_cpp import Llama
import logging

# Initialize logging
logging.basicConfig(level=logging.INFO)

# Initialize the Llama model with chat format set to "llama-2"
llm = Llama(model_path="./law-chat.Q2_K.gguf", chat_format="llama-2")

# Define the system prompt
system_prompt = (
    "[INSTRUCTION] You are a chatbot named 'Makkal Thunaivan' designed to provide legal support to marginalized communities in India. "
    "You were fine-tuned by Sathish Kumar and his team members at the University College of Engineering Dindigul. "
     "Developer Team members include Karthikeyan as Model Trainer, Prashanna as Dataset Researcher, Nivas as Model Architect, and Sathish Kumar as Team Leader and Frontend Developer and Model Tester. "
    "Your purpose is to answer questions related to Indian law and marginalized communities in India. "
    "You have been trained on various legal topics. "
    "Your responses should be concise, meaningful, and accurate."
    "When a user asks for more information or details, provide a more comprehensive explanation. "
    "Your responses should be respectful and informative."
    "Do not provide information unrelated to India or Indian law. "
    "Feel free to ask questions."
)

# Initialize the conversation history list with the system prompt
conversation_history = [{"role": "system", "content": system_prompt}]

# Define conversation history size limit
MAX_CONVERSATION_HISTORY_SIZE = 2000

# Create a Flask application
app = Flask(__name__)

# Define a function to calculate the total number of tokens in conversation history using the Llama model's tokenizer
def calculate_total_tokens(messages):
    try:
        # Convert content to string and tokenize
        total_tokens = sum(len(llm.tokenize(str(message["content"]), add_bos=False, special=True)) for message in messages)
        return total_tokens
    except Exception as e:
        logging.error(f"Error during tokenization: {e}")
        return 0  # Return a safe value (0) to handle the error

# Define a function to trim the conversation history if the total number of tokens exceeds the context window size
def trim_conversation_history():
    global conversation_history
    total_tokens = calculate_total_tokens(conversation_history)
    context_window_size = 2000

    while total_tokens > context_window_size:
        # Remove the oldest messages from the conversation history
        conversation_history.pop(0)
        # Recalculate the total number of tokens
        total_tokens = calculate_total_tokens(conversation_history)

# Define the model function
def model(query):
    global conversation_history

    # Add the user's query to the conversation history
    conversation_history.append({"role": "user", "content": query})

    # Calculate the total number of tokens in the conversation history
    total_tokens = calculate_total_tokens(conversation_history)

    # If the total number of tokens exceeds the model's context window, trim the history
    trim_conversation_history()

    # Generate chat completion with the conversation history
    try:
        response = llm.create_chat_completion(messages=conversation_history, max_tokens=200)

        # Extract the assistant's response from the completion dictionary
        if response and 'choices' in response and response['choices']:
            assistant_response = response['choices'][0]['message']['content']
            assistant_response = assistant_response.strip()

            # Add the assistant's response to the conversation history
            conversation_history.append({"role": "assistant", "content": assistant_response})

            # Return the assistant's response
            return assistant_response
        else:
            logging.error("Error: Invalid response structure.")
            return None
    except Exception as e:
        logging.error(f"Error during chat completion: {e}")
        return None

# Define the endpoint for the API
@app.route("/chat", methods=["GET"])
def chat_endpoint():
    # Get the query parameter from the request
    query = request.args.get("query")

    # Check if the "refresh" parameter is set to "true"
    refresh = request.args.get("refresh")
    if refresh and refresh.lower() == "true":
        # Clear the conversation history
        global conversation_history
        conversation_history = [{"role": "system", "content": system_prompt}]
        return jsonify({"response": "Conversation history cleared."})

    # If there is no query, return an error message
    if not query:
        return jsonify({"error": "Query parameter is required."}), 400

    # Call the model function with the query
    response = model(query)

    # Return the assistant's response as JSON
    if response is None:
        return jsonify({"error": "An error occurred while processing the request."}), 500

    # Check the size of the conversation history and clear if necessary
    if len(conversation_history) > MAX_CONVERSATION_HISTORY_SIZE:
        conversation_history = [{"role": "system", "content": system_prompt}]
        return jsonify({"response": response, "notification": "Conversation history was cleared due to exceeding maximum size."})
    print(response)
    return jsonify({"response": response})

# Run the Flask app
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)