Studiobotxyz commited on
Commit
4ab0120
·
1 Parent(s): 67ee73b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -39
app.py CHANGED
@@ -1,14 +1,11 @@
1
  import os
2
- os.system("pip install flask ctransformers gradio")
3
  import time
4
  import requests
5
  from tqdm import tqdm
6
- from flask import Flask, request, jsonify
7
  import ctransformers
8
  import gradio as gr
9
 
10
- app = Flask(__name__)
11
-
12
  if not os.path.isfile('llama-2-7b.ggmlv3.q4_K_S.bin'):
13
  print("Downloading Model from HuggingFace")
14
  url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
@@ -31,16 +28,6 @@ config.config.stop = ["\n"]
31
  llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
32
  print("Loaded model")
33
 
34
- def time_it(func):
35
- def wrapper(*args, **kwargs):
36
- start_time = time.time()
37
- result = func(*args, **kwargs)
38
- end_time = time.time()
39
- execution_time = end_time - start_time
40
- print(f"Function '{func.__name__}' took {execution_time:.6f} seconds to execute.")
41
- return result
42
- return wrapper
43
-
44
  def complete(prompt, stop=["User", "Assistant"]):
45
  tokens = llm.tokenize(prompt)
46
  token_count = 0
@@ -58,31 +45,9 @@ def complete(prompt, stop=["User", "Assistant"]):
58
  print('\n')
59
  return [output, token_count]
60
 
61
- @app.route('/generate', methods=['POST'])
62
- def generate_response():
63
- data = request.get_json()
64
- question = data.get('question', '')
65
-
66
- start_time = time.time()
67
  output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
68
- end_time = time.time()
69
- execution_time = end_time - start_time
70
-
71
- response = {
72
- 'output': output,
73
- 'token_count': token_count,
74
- 'execution_time': execution_time,
75
- 'tokens_per_second': token_count / execution_time
76
- }
77
 
78
- return jsonify(response)
79
-
80
- def greet(name):
81
- _, token_count = complete(f'User: {name}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
82
- return f"Response: {name} | Tokens: {token_count}"
83
-
84
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
85
  iface.launch()
86
-
87
- if __name__ == '__main__':
88
- app.run(debug=True)
 
1
  import os
2
+ os.system("pip install ctransformers gradio")
3
  import time
4
  import requests
5
  from tqdm import tqdm
 
6
  import ctransformers
7
  import gradio as gr
8
 
 
 
9
  if not os.path.isfile('llama-2-7b.ggmlv3.q4_K_S.bin'):
10
  print("Downloading Model from HuggingFace")
11
  url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
 
28
  llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
29
  print("Loaded model")
30
 
 
 
 
 
 
 
 
 
 
 
31
  def complete(prompt, stop=["User", "Assistant"]):
32
  tokens = llm.tokenize(prompt)
33
  token_count = 0
 
45
  print('\n')
46
  return [output, token_count]
47
 
48
+ def greet(question):
 
 
 
 
 
49
  output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
50
+ return f"Response: {output} | Tokens: {token_count}"
 
 
 
 
 
 
 
 
51
 
52
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text", live=True)
 
 
 
 
 
 
53
  iface.launch()