vkrishnan569 commited on
Commit
c2cecc5
1 Parent(s): 6c03bd4

Server Deployment

Browse files
Files changed (2) hide show
  1. deployment.py +88 -0
  2. requirements.txt +13 -0
deployment.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from llama_cpp import Llama
3
+
4
+ # Initialize the Llama model with chat format set to "llama-2"
5
+ llm = Llama(model_path="E:/langchain-chat-gui-main/langchain-chat-gui-main/llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
6
+
7
+ # Define the system prompt
8
+ system_prompt = (
9
+ "I am an Indian law chatbot designed to provide legal support to marginalized communities. "
10
+ "This model was fine-tuned by Sathish and his team members at the University College of Engineering Dindigul. "
11
+ "The model has been trained on various legal topics. "
12
+ "Feel free to ask questions."
13
+ )
14
+
15
+ # Initialize the conversation history list with the system prompt
16
+ conversation_history = [{"role": "system", "content": system_prompt}]
17
+
18
+ # Create a Flask application
19
+ app = Flask(__name__)
20
+
21
+ # Define the model function
22
+ def model(query):
23
+ global conversation_history # Declare global to update history
24
+
25
+ # Add the user's query to the conversation history
26
+ conversation_history.append({"role": "user", "content": query})
27
+
28
+ # Calculate the total number of tokens in the conversation history
29
+ # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
30
+ total_tokens = sum(len(message["content"].split()) for message in conversation_history)
31
+
32
+ # If the total number of tokens exceeds the model's context window, trim the history
33
+ # You may need to adjust the 512 value based on your model's actual context window size
34
+ context_window_size = 512
35
+ while total_tokens > context_window_size:
36
+ # Remove the oldest messages from the conversation history
37
+ conversation_history.pop(0)
38
+ # Recalculate the total number of tokens
39
+ total_tokens = sum(len(message["content"].split()) for message in conversation_history)
40
+
41
+ # Generate chat completion with the conversation history
42
+ response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
43
+
44
+ # Extract the assistant's response from the completion dictionary
45
+ if response and 'choices' in response and response['choices']:
46
+ assistant_response = response['choices'][0]['message']['content']
47
+ assistant_response = assistant_response.strip()
48
+
49
+ # Add the assistant's response to the conversation history
50
+ conversation_history.append({"role": "assistant", "content": assistant_response})
51
+
52
+ # Print the assistant's response
53
+ print("Assistant response:", assistant_response)
54
+
55
+ # Return the assistant's response
56
+ return assistant_response
57
+ else:
58
+ print("Error: Invalid response structure.")
59
+ return None
60
+
61
+
62
+ # Define the endpoint for the API
63
+ @app.route("/chat", methods=["GET"])
64
+ def chat_endpoint():
65
+ # Get the query parameter from the request
66
+ query = request.args.get("query")
67
+
68
+ # Check if the "refresh" parameter is set to "true"
69
+ refresh = request.args.get("refresh")
70
+ if refresh and refresh.lower() == "true":
71
+ # Clear the conversation history
72
+ global conversation_history
73
+ conversation_history = [{"role": "system", "content": system_prompt}]
74
+ return jsonify({"response": "Conversation history cleared."})
75
+
76
+ # If there is no query, return an error message
77
+ if not query:
78
+ return jsonify({"error": "Query parameter is required."}), 400
79
+
80
+ # Call the model function with the query
81
+ response = model(query)
82
+
83
+ # Return the assistant's response as JSON
84
+ return jsonify({"response": response})
85
+
86
+ # Run the Flask app
87
+ if __name__ == "__main__":
88
+ app.run(host="0.0.0.0", port=5000)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blinker==1.8.2
2
+ click==8.1.7
3
+ colorama==0.4.6
4
+ diskcache==5.6.3
5
+ Flask==3.0.3
6
+ itsdangerous==2.2.0
7
+ Jinja2==3.1.4
8
+ llama_cpp_python==0.2.69
9
+ MarkupSafe==2.1.5
10
+ numpy==1.26.4
11
+ typing_extensions==4.11.0
12
+ Werkzeug==3.0.3
13
+