File size: 2,515 Bytes
a35163f
 
 
 
 
683cf67
f1f9df6
a35163f
1e2ba54
a35163f
1e2ba54
a35163f
1e2ba54
a35163f
 
1e2ba54
f1f9df6
a35163f
683cf67
a35163f
448c406
a35163f
 
 
448c406
a35163f
448c406
a35163f
 
 
 
 
 
448c406
a35163f
 
 
 
 
 
 
 
 
 
 
 
 
666bc15
a35163f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448c406
a35163f
448c406
a35163f
 
ed9cd5d
909f50c
683cf67
448c406
 
 
a35163f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.llms import GPT4All
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# # import requests

# # url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"

# # response = requests.get(url)

# # with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
# #     f.write(response.content)


# print("DONE")

# def func(user):
        
#     template = """
#     Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
#     Question: {question}
    
#     Answer: """
    
#     prompt = PromptTemplate(template=template, input_variables=["question"])
    
#     local_path = (
#         "./nous-hermes-13b.ggmlv3.q4_0.bin"
#     )

    
#     # # Callbacks support token-wise streaming
#     # callbacks = [StreamingStdOutCallbackHandler()]
    
#     # Verbose is required to pass to the callback manager
#     llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048)
#     llm_chain = LLMChain(prompt=prompt, llm=llm)
#     question = user
#     llm_chain.run(question)

#     return llm_chain.run(question)

# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()

import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

print("DONE")

def func(user):
    template = """
    Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
    Question: {question}
    
    Answer: """
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin"
    
    llm = LlamaCpp(model_path=local_path, n_ctx=2048)
    llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True)  # Enable streaming mode
    question = user
    llm_chain.run(question)

    return llm_chain.run(question)

iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch()