Spaces:

Rockramsri
/

StreamForLLM

Sleeping

File size: 675 Bytes

f7f4d28
c14813e
 
d88cce3
f7f4d28
c14813e
 
 
 
 
f45ab7a
 
544a0eb
d88cce3
544a0eb
d88cce3
 
9b3b808

import streamlit as st
import subprocess
import sys
import time

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
install("llama-cpp-python")

from llama_cpp import Llama
prompt = st.chat_input("Say something")
if prompt:
    llm = Llama(model_path="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
    start = time.time()
    r=llm(prompt, max_tokens=1000)
    end = time.time()
    print(f"The Generation time for 1000 tokens is : {end - start}")
    l="Nothing"
    try:
        l=r["choices"][0]["text"]
    except Exception as e:
        print(e)
    st.write(f"User has sent the following prompt: {prompt} with response: {l} ")