import gradio as gr from langchain.llms import LlamaCpp from langchain import PromptTemplate, LLMChain from langchain.llms import GPT4All from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # import requests # url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin" # response = requests.get(url) # with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f: # f.write(response.content) print("DONE") def func(user): template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) local_path = ( "./nous-hermes-13b.ggmlv3.q4_0.bin" ) # # Callbacks support token-wise streaming # callbacks = [StreamingStdOutCallbackHandler()] # Verbose is required to pass to the callback manager llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048) llm_chain = LLMChain(prompt=prompt, llm=llm) question = user llm_chain.run(question) return llm_chain.run(question) iface = gr.Interface(fn=func, inputs="text", outputs="text") iface.launch()