aibsimilarityllm / main.py
aibmedia's picture
Update main.py
004a7b1 verified
raw
history blame
2.11 kB
import os
from flask import Flask, render_template
import threading
import asyncio
from openai import OpenAI
# app = Flask(__name__)
# client = OpenAI(
# # This base_url points to the local Llamafile server running on port 8080
# base_url="http://127.0.0.1:8080/v1",
# api_key="sk-no-key-required"
# )
API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2"
bearer = "Bearer " + os.getenv('TOKEN')
headers = {"Authorization": bearer }
print("headers")
print(headers)
app = Flask(__name__)
@app.route('/app')
def server_app():
llamafile = threading.Thread(target=threadserver)
print('This /app will start the llamafile server on thread')
llamafile.start()
return 'llamafile.start()'
@app.route('/')
def server_one():
sourcesim = "Results"
s1 = "Results"
return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer )
# @app.route('/chat', methods=['POST'])
# def chat():
# try:
# user_message = request.json['message']
# completion = client.chat.completions.create(
# model="LLaMA_CPP",
# messages=[
# {"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."},
# {"role": "user", "content": user_message}
# ]
# )
# ai_response = completion.choices[0].message.content
# ai_response = ai_response.replace('</s>', '').strip()
# return jsonify({'response': ai_response})
# except Exception as e:
# print(f"Error: {str(e)}")
# return jsonify({'response': f"Sorry, there was an error processing your request: {str(e)}"}), 500
if __name__ == '__main__':
app.run(debug=True)
def threadserver():
print('hi')
os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
async def query(data):
response = await requests.post(API_URL, headers=headers, json=data)
return response.json()