Spaces:
Sleeping
Sleeping
vkrishnan569
commited on
Commit
•
c2cecc5
1
Parent(s):
6c03bd4
Server Deployment
Browse files- deployment.py +88 -0
- requirements.txt +13 -0
deployment.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from llama_cpp import Llama
|
3 |
+
|
4 |
+
# Initialize the Llama model with chat format set to "llama-2"
|
5 |
+
llm = Llama(model_path="E:/langchain-chat-gui-main/langchain-chat-gui-main/llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
|
6 |
+
|
7 |
+
# Define the system prompt
|
8 |
+
system_prompt = (
|
9 |
+
"I am an Indian law chatbot designed to provide legal support to marginalized communities. "
|
10 |
+
"This model was fine-tuned by Sathish and his team members at the University College of Engineering Dindigul. "
|
11 |
+
"The model has been trained on various legal topics. "
|
12 |
+
"Feel free to ask questions."
|
13 |
+
)
|
14 |
+
|
15 |
+
# Initialize the conversation history list with the system prompt
|
16 |
+
conversation_history = [{"role": "system", "content": system_prompt}]
|
17 |
+
|
18 |
+
# Create a Flask application
|
19 |
+
app = Flask(__name__)
|
20 |
+
|
21 |
+
# Define the model function
|
22 |
+
def model(query):
|
23 |
+
global conversation_history # Declare global to update history
|
24 |
+
|
25 |
+
# Add the user's query to the conversation history
|
26 |
+
conversation_history.append({"role": "user", "content": query})
|
27 |
+
|
28 |
+
# Calculate the total number of tokens in the conversation history
|
29 |
+
# (You may need to modify this part to calculate the token count accurately based on your tokenizer)
|
30 |
+
total_tokens = sum(len(message["content"].split()) for message in conversation_history)
|
31 |
+
|
32 |
+
# If the total number of tokens exceeds the model's context window, trim the history
|
33 |
+
# You may need to adjust the 512 value based on your model's actual context window size
|
34 |
+
context_window_size = 512
|
35 |
+
while total_tokens > context_window_size:
|
36 |
+
# Remove the oldest messages from the conversation history
|
37 |
+
conversation_history.pop(0)
|
38 |
+
# Recalculate the total number of tokens
|
39 |
+
total_tokens = sum(len(message["content"].split()) for message in conversation_history)
|
40 |
+
|
41 |
+
# Generate chat completion with the conversation history
|
42 |
+
response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
|
43 |
+
|
44 |
+
# Extract the assistant's response from the completion dictionary
|
45 |
+
if response and 'choices' in response and response['choices']:
|
46 |
+
assistant_response = response['choices'][0]['message']['content']
|
47 |
+
assistant_response = assistant_response.strip()
|
48 |
+
|
49 |
+
# Add the assistant's response to the conversation history
|
50 |
+
conversation_history.append({"role": "assistant", "content": assistant_response})
|
51 |
+
|
52 |
+
# Print the assistant's response
|
53 |
+
print("Assistant response:", assistant_response)
|
54 |
+
|
55 |
+
# Return the assistant's response
|
56 |
+
return assistant_response
|
57 |
+
else:
|
58 |
+
print("Error: Invalid response structure.")
|
59 |
+
return None
|
60 |
+
|
61 |
+
|
62 |
+
# Define the endpoint for the API
|
63 |
+
@app.route("/chat", methods=["GET"])
|
64 |
+
def chat_endpoint():
|
65 |
+
# Get the query parameter from the request
|
66 |
+
query = request.args.get("query")
|
67 |
+
|
68 |
+
# Check if the "refresh" parameter is set to "true"
|
69 |
+
refresh = request.args.get("refresh")
|
70 |
+
if refresh and refresh.lower() == "true":
|
71 |
+
# Clear the conversation history
|
72 |
+
global conversation_history
|
73 |
+
conversation_history = [{"role": "system", "content": system_prompt}]
|
74 |
+
return jsonify({"response": "Conversation history cleared."})
|
75 |
+
|
76 |
+
# If there is no query, return an error message
|
77 |
+
if not query:
|
78 |
+
return jsonify({"error": "Query parameter is required."}), 400
|
79 |
+
|
80 |
+
# Call the model function with the query
|
81 |
+
response = model(query)
|
82 |
+
|
83 |
+
# Return the assistant's response as JSON
|
84 |
+
return jsonify({"response": response})
|
85 |
+
|
86 |
+
# Run the Flask app
|
87 |
+
if __name__ == "__main__":
|
88 |
+
app.run(host="0.0.0.0", port=5000)
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
blinker==1.8.2
|
2 |
+
click==8.1.7
|
3 |
+
colorama==0.4.6
|
4 |
+
diskcache==5.6.3
|
5 |
+
Flask==3.0.3
|
6 |
+
itsdangerous==2.2.0
|
7 |
+
Jinja2==3.1.4
|
8 |
+
llama_cpp_python==0.2.69
|
9 |
+
MarkupSafe==2.1.5
|
10 |
+
numpy==1.26.4
|
11 |
+
typing_extensions==4.11.0
|
12 |
+
Werkzeug==3.0.3
|
13 |
+
|