StudioGPT2 / app.py
Studiobotxyz's picture
Update app.py
2cc37c4
raw
history blame
2.13 kB
import os
os.system("pip install ctransformers gradio")
import time
import requests
from tqdm import tqdm
import ctransformers
import gradio as gr
if not os.path.isfile('./llama-2-7b.ggmlv3.q4_K_S.bin'):
print("Downloading Model from HuggingFace")
url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open('llama-2-7b.ggmlv3.q4_K_S.bin', 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
configObj = ctransformers.Config(stop=["\n", 'User'])
config = ctransformers.AutoConfig(config=configObj, model_type='llama')
config.config.stop = ["\n"]
llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
print("Loaded model")
def complete(prompt, stop=["User", "Assistant"]):
print("0")
tokens = llm.tokenize(prompt)
print("1")
token_count = 0
output = ''
print("2")
for token in llm.generate(tokens):
print("tokens")
token_count += 1
result = llm.detokenize(token)
print("detokens")
output += result
print(output)
for word in stop:
if word in output:
print(output, " | ", token_count)
return output, token_count
return output, token_count
def greet(question):
print(question)
output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
response = f"Response: {output} | Tokens: {token_count}"
print(response)
return response
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch(share=True)