Spaces:
Sleeping
Sleeping
oscarwang2
commited on
Commit
•
50f4808
1
Parent(s):
0b023c6
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from groq import Groq
|
3 |
+
import os
|
4 |
+
import gradio as gr
|
5 |
+
import threading
|
6 |
+
import time
|
7 |
+
|
8 |
+
client = Groq()
|
9 |
+
max_size = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
|
10 |
+
file_index = 1
|
11 |
+
current_file = f'data{file_index}.csv'
|
12 |
+
file_paths = [current_file]
|
13 |
+
combined_tokens = 0
|
14 |
+
|
15 |
+
def get_file_size(filename):
|
16 |
+
if os.path.isfile(filename):
|
17 |
+
return os.path.getsize(filename)
|
18 |
+
return 0
|
19 |
+
|
20 |
+
def generate_and_save_data():
|
21 |
+
global file_index, current_file, file_paths, combined_tokens
|
22 |
+
while True:
|
23 |
+
try:
|
24 |
+
# Generate a prompt
|
25 |
+
completion = client.chat.completions.create(
|
26 |
+
model="llama-3.1-70b-versatile",
|
27 |
+
messages=[
|
28 |
+
{
|
29 |
+
"role": "user",
|
30 |
+
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that I is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
|
31 |
+
}
|
32 |
+
],
|
33 |
+
temperature=1,
|
34 |
+
max_tokens=1024,
|
35 |
+
top_p=1,
|
36 |
+
stream=True,
|
37 |
+
stop=None,
|
38 |
+
)
|
39 |
+
|
40 |
+
prompt = ""
|
41 |
+
prompt_tokens = 0
|
42 |
+
for chunk in completion:
|
43 |
+
prompt += chunk.choices[0].delta.content or ""
|
44 |
+
prompt_tokens += len(chunk.choices[0].delta.content.split()) # Assuming tokens are words for simplicity
|
45 |
+
|
46 |
+
# Use the generated prompt to query the model again
|
47 |
+
second_completion = client.chat.completions.create(
|
48 |
+
model="llama-3.1-70b-versatile",
|
49 |
+
messages=[
|
50 |
+
{
|
51 |
+
"role": "user",
|
52 |
+
"content": prompt
|
53 |
+
}
|
54 |
+
],
|
55 |
+
temperature=1,
|
56 |
+
max_tokens=8000,
|
57 |
+
top_p=1,
|
58 |
+
stream=True,
|
59 |
+
stop=None,
|
60 |
+
)
|
61 |
+
|
62 |
+
response = ""
|
63 |
+
response_tokens = 0
|
64 |
+
for chunk in second_completion:
|
65 |
+
response += chunk.choices[0].delta.content or ""
|
66 |
+
response_tokens += len(chunk.choices[0].delta.content.split()) # Assuming tokens are words for simplicity
|
67 |
+
|
68 |
+
# Update the combined token count
|
69 |
+
combined_tokens += (prompt_tokens + response_tokens)
|
70 |
+
|
71 |
+
# Print the generated prompt and the response
|
72 |
+
print("Generated prompt:", prompt)
|
73 |
+
print("Response to the generated prompt:", response)
|
74 |
+
|
75 |
+
# Create a DataFrame with the prompt and response
|
76 |
+
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
|
77 |
+
|
78 |
+
# Check the size of the current file
|
79 |
+
if get_file_size(current_file) >= max_size:
|
80 |
+
file_index += 1
|
81 |
+
current_file = f'data{file_index}.csv'
|
82 |
+
file_paths.append(current_file)
|
83 |
+
|
84 |
+
# Check if the current file exists
|
85 |
+
file_exists = os.path.isfile(current_file)
|
86 |
+
|
87 |
+
# If the file exists, append without overwriting
|
88 |
+
if file_exists:
|
89 |
+
data.to_csv(current_file, mode='a', header=False, index=False)
|
90 |
+
else:
|
91 |
+
data.to_csv(current_file, mode='w', header=True, index=False)
|
92 |
+
|
93 |
+
except Exception as e:
|
94 |
+
print(f"An error occurred: {e}. Retrying in 5 seconds...")
|
95 |
+
time.sleep(5)
|
96 |
+
|
97 |
+
def get_available_files():
|
98 |
+
return [f for f in file_paths if os.path.isfile(f)]
|
99 |
+
|
100 |
+
def update_file_list():
|
101 |
+
return gr.Dropdown.update(choices=get_available_files())
|
102 |
+
|
103 |
+
def update_token_count():
|
104 |
+
return combined_tokens
|
105 |
+
|
106 |
+
# Start the data generation in a separate thread
|
107 |
+
thread = threading.Thread(target=generate_and_save_data)
|
108 |
+
thread.daemon = True
|
109 |
+
thread.start()
|
110 |
+
|
111 |
+
# Create Gradio interface
|
112 |
+
with gr.Blocks() as app:
|
113 |
+
gr.Markdown("## AI Prompt and Response Generator")
|
114 |
+
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
|
115 |
+
|
116 |
+
file_selector = gr.Dropdown(label="Select a data file to download", choices=get_available_files())
|
117 |
+
download_button = gr.File(label="Download Selected File")
|
118 |
+
|
119 |
+
def download_file(selected_file):
|
120 |
+
return selected_file
|
121 |
+
|
122 |
+
refresh_button = gr.Button("Refresh File List")
|
123 |
+
refresh_button.click(update_file_list, outputs=file_selector)
|
124 |
+
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
|
125 |
+
|
126 |
+
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
|
127 |
+
|
128 |
+
def update_token_display():
|
129 |
+
return str(update_token_count())
|
130 |
+
|
131 |
+
# Update the token count every second
|
132 |
+
token_refresh = gr.Button("Refresh Token Count")
|
133 |
+
token_refresh.click(update_token_display, outputs=token_display)
|
134 |
+
|
135 |
+
app.launch()
|