Spaces:

Rockramsri
/

StreamForLLM

Sleeping

StreamForLLM / app.py

c7825ce verified 11 months ago

711 Bytes

	import streamlit as st
	import subprocess
	import sys
	import time

	def install(package):
	subprocess.check_call([sys.executable, "-m", "pip", "install", package])
	install("llama-cpp-python")

	from llama_cpp import Llama
	prompt = st.chat_input("Say something")
	if prompt:
	llm = Llama(model_path="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
	print(f"Started {time.time()}")
	start = time.time()
	r=llm(prompt, max_tokens=1000)
	end = time.time()
	print(f"The Generation time for 1000 tokens is : {end - start}")
	l="Nothing"
	try:
	l=r["choices"][0]["text"]
	except Exception as e:
	print(e)
	st.write(f"User has sent the following prompt: {prompt} with response: {l} ")