Spaces:

vkrishnan569
/

Law_Model_Server

Sleeping

App Files Files Community

vkrishnan569 commited on May 6, 2024

Commit

c2cecc5

1 Parent(s): 6c03bd4

Server Deployment

Browse files

Files changed (2) hide show

deployment.py +88 -0
requirements.txt +13 -0

deployment.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from flask import Flask, request, jsonify
+from llama_cpp import Llama
+# Initialize the Llama model with chat format set to "llama-2"
+llm = Llama(model_path="E:/langchain-chat-gui-main/langchain-chat-gui-main/llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
+# Define the system prompt
+system_prompt = (
+    "I am an Indian law chatbot designed to provide legal support to marginalized communities. "
+    "This model was fine-tuned by Sathish and his team members at the University College of Engineering Dindigul. "
+    "The model has been trained on various legal topics. "
+    "Feel free to ask questions."
+)
+# Initialize the conversation history list with the system prompt
+conversation_history = [{"role": "system", "content": system_prompt}]
+# Create a Flask application
+app = Flask(__name__)
+# Define the model function
+def model(query):
+    global conversation_history  # Declare global to update history
+    # Add the user's query to the conversation history
+    conversation_history.append({"role": "user", "content": query})
+    # Calculate the total number of tokens in the conversation history
+    # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
+    total_tokens = sum(len(message["content"].split()) for message in conversation_history)
+    # If the total number of tokens exceeds the model's context window, trim the history
+    # You may need to adjust the 512 value based on your model's actual context window size
+    context_window_size = 512
+    while total_tokens > context_window_size:
+        # Remove the oldest messages from the conversation history
+        conversation_history.pop(0)
+        # Recalculate the total number of tokens
+        total_tokens = sum(len(message["content"].split()) for message in conversation_history)
+    # Generate chat completion with the conversation history
+    response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
+    # Extract the assistant's response from the completion dictionary
+    if response and 'choices' in response and response['choices']:
+        assistant_response = response['choices'][0]['message']['content']
+        assistant_response = assistant_response.strip()
+        # Add the assistant's response to the conversation history
+        conversation_history.append({"role": "assistant", "content": assistant_response})
+        # Print the assistant's response
+        print("Assistant response:", assistant_response)
+        # Return the assistant's response
+        return assistant_response
+    else:
+        print("Error: Invalid response structure.")
+        return None
+# Define the endpoint for the API
+@app.route("/chat", methods=["GET"])
+def chat_endpoint():
+    # Get the query parameter from the request
+    query = request.args.get("query")
+    # Check if the "refresh" parameter is set to "true"
+    refresh = request.args.get("refresh")
+    if refresh and refresh.lower() == "true":
+        # Clear the conversation history
+        global conversation_history
+        conversation_history = [{"role": "system", "content": system_prompt}]
+        return jsonify({"response": "Conversation history cleared."})
+    # If there is no query, return an error message
+    if not query:
+        return jsonify({"error": "Query parameter is required."}), 400
+    # Call the model function with the query
+    response = model(query)
+    # Return the assistant's response as JSON
+    return jsonify({"response": response})
+# Run the Flask app
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=5000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+blinker==1.8.2
+click==8.1.7
+colorama==0.4.6
+diskcache==5.6.3
+Flask==3.0.3
+itsdangerous==2.2.0
+Jinja2==3.1.4
+llama_cpp_python==0.2.69
+MarkupSafe==2.1.5
+numpy==1.26.4
+typing_extensions==4.11.0
+Werkzeug==3.0.3