llama-cpp-python

Runtime error

zac commited on Aug 7, 2023

Commit

3c6b95f

1 Parent(s): 601f061

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import time
 import ctypes #to run on C api directly
 import llama_cpp
@@ -19,16 +20,13 @@ def generate_text(input_text, history):
         input_text_with_history = history[-1][1]+ "\n"
         input_text_with_history += f"Q: {input_text} \n A:"
     print("new input", input_text_with_history)
-    output = llm(input_text_with_history, max_tokens=1024, stop=["Q:", "\n"], echo=True)
     response = output['choices'][0]['text'] + "\n"
     print("response", response)
     history =["init",input_text_with_history]
-    for character in history:
-        time.sleep(0.05)
-        yield history
 demo = gr.ChatInterface(generate_text)
 demo.queue(concurrency_count=1, max_size=5)
@@ -36,3 +34,4 @@ demo.launch()

 import gradio as gr
+import jason
 import time
 import ctypes #to run on C api directly
 import llama_cpp
         input_text_with_history = history[-1][1]+ "\n"
         input_text_with_history += f"Q: {input_text} \n A:"
     print("new input", input_text_with_history)
+    output = llm(input_text_with_history, max_tokens=1024, stop=["Q:", "\n"], stream=True)
     response = output['choices'][0]['text'] + "\n"
     print("response", response)
     history =["init",input_text_with_history]
+    return response
+    for out in output
+     print(jason.dump(out, indent=2))
 demo = gr.ChatInterface(generate_text)
 demo.queue(concurrency_count=1, max_size=5)