StudioGPT2 / app.py
Studiobotxyz's picture
Update app.py
c0799ff
raw
history blame
2.01 kB
import os
os.system("pip install ctransformers gradio")
import time
import requests
from tqdm import tqdm
import ctransformers
import gradio as gr
if not os.path.isfile('./llama-2-7b.ggmlv3.q4_K_S.bin'):
print("Downloading Model from HuggingFace")
url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open('llama-2-7b.ggmlv3.q4_K_S.bin', 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
configObj = ctransformers.Config(stop=["\n", 'User'])
config = ctransformers.AutoConfig(config=configObj, model_type='llama')
config.config.stop = ["\n"]
llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
print("Loaded model")
def complete(prompt, stop=["User", "Assistant"]):
tokens = llm.tokenize(prompt)
token_count = 0
output = ''
for token in llm.generate(tokens):
token_count += 1
result = llm.detokenize(token)
output += result
for word in stop:
if word in output:
print('\n')
return [output, token_count]
print(result, end='', flush=True)
print('\n')
return [output, token_count]
def greet(question):
print(question)
output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
return f"Response: {output} | Tokens: {token_count}"
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch(share=True)