from flask import Flask, request, jsonify from langchain_community.llms import LlamaCpp from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.prompts import PromptTemplate from langchain.schema.output_parser import StrOutputParser # from langchain.llms import LlamaCpp app = Flask(__name__) n_gpu_layers = 0 n_batch = 1024 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) llm = LlamaCpp( model_path="phi-2.Q4_K_M.gguf", temperature=0.1, n_gpu_layers=n_gpu_layers, n_batch=n_batch, callback_manager=callback_manager, verbose=True, n_ctx=2048 ) @app.route('/', methods=['POST']) def get_skills(): cv_body = request.json.get('cv_body') template = """Instruct: Take a deep breath to deep understand, and don't this cv vc = {cv_body} . to answer this question and instructions \nOutput:""" prompt = PromptTemplate(template=template, input_variables=["text"]) chain = prompt | llm | StrOutputParser() ans = chain.invoke({"question": "What are his best skills? write in points","cv_body":cv_body}, config={ # "callbacks": [ConsoleCallbackHandler()] }) return jsonify({'skills': ans}) if __name__ == '__main__': app.run()