from flask import Flask, request, jsonify from langchain_community.llms import LlamaCpp from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.prompts import PromptTemplate from langchain.schema.output_parser import StrOutputParser # from langchain.llms import LlamaCpp app = Flask(__name__) n_gpu_layers = 0 n_batch = 1024 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) llm = LlamaCpp( model_path="Phi-3-mini-4k-instruct-fp16.gguf", temperature=0.1, n_gpu_layers=n_gpu_layers, n_batch=n_batch, callback_manager=callback_manager, verbose=True, n_ctx=2048 ) @app.route('/', methods=['POST']) def get_skills(): cv_body = request.json.get('cv_body') question = "What are his best skills? write in points" template = """ <|user|> I am analyzing cv this cv {cv_body} , {question} <|end|> <|assistant|> """ prompt = PromptTemplate(template=template, input_variables=["text"]) chain = prompt | llm | StrOutputParser() ans = chain.invoke({"question":question ,"cv_body":cv_body}, config={ # "callbacks": [ConsoleCallbackHandler()] }) return jsonify({'skills': ans}) if __name__ == '__main__': app.run()