Spaces:
Sleeping
Sleeping
oscarwang2
commited on
Commit
•
ddecd6a
1
Parent(s):
601e197
Update app.py
Browse files
app.py
CHANGED
@@ -1,30 +1,39 @@
|
|
1 |
import pandas as pd
|
2 |
-
from groq import Groq
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
import threading
|
6 |
import time
|
|
|
7 |
|
|
|
8 |
client = Groq()
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
file_index = 1
|
11 |
-
|
12 |
-
current_file = os.path.join(data_directory, f'data{file_index}.csv')
|
13 |
file_paths = [current_file]
|
14 |
combined_tokens = 0
|
15 |
-
update_interval = 1 # Update interval in seconds
|
16 |
-
|
17 |
-
# Ensure the data directory exists
|
18 |
-
if not os.path.exists(data_directory):
|
19 |
-
os.makedirs(data_directory)
|
20 |
|
|
|
21 |
def get_file_size(filename):
|
22 |
-
if os.path.isfile(filename)
|
23 |
-
return os.path.getsize(filename)
|
24 |
-
return 0
|
25 |
|
|
|
26 |
def generate_and_save_data():
|
27 |
global file_index, current_file, file_paths, combined_tokens
|
|
|
|
|
|
|
|
|
|
|
28 |
while True:
|
29 |
try:
|
30 |
# Generate a prompt
|
@@ -33,7 +42,7 @@ def generate_and_save_data():
|
|
33 |
messages=[
|
34 |
{
|
35 |
"role": "user",
|
36 |
-
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that
|
37 |
}
|
38 |
],
|
39 |
temperature=1,
|
@@ -47,9 +56,9 @@ def generate_and_save_data():
|
|
47 |
prompt_tokens = 0
|
48 |
for chunk in completion:
|
49 |
content = chunk.choices[0].delta.content
|
50 |
-
if content
|
51 |
prompt += content
|
52 |
-
prompt_tokens += len(content.split())
|
53 |
|
54 |
# Use the generated prompt to query the model again
|
55 |
second_completion = client.chat.completions.create(
|
@@ -61,7 +70,7 @@ def generate_and_save_data():
|
|
61 |
}
|
62 |
],
|
63 |
temperature=1,
|
64 |
-
max_tokens=
|
65 |
top_p=1,
|
66 |
stream=True,
|
67 |
stop=None,
|
@@ -71,9 +80,9 @@ def generate_and_save_data():
|
|
71 |
response_tokens = 0
|
72 |
for chunk in second_completion:
|
73 |
content = chunk.choices[0].delta.content
|
74 |
-
if content
|
75 |
response += content
|
76 |
-
response_tokens += len(content.split())
|
77 |
|
78 |
# Update the combined token count
|
79 |
combined_tokens += (prompt_tokens + response_tokens)
|
@@ -86,36 +95,43 @@ def generate_and_save_data():
|
|
86 |
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
|
87 |
|
88 |
# Check the size of the current file
|
89 |
-
if get_file_size(current_file) >=
|
90 |
file_index += 1
|
91 |
-
current_file = os.path.join(
|
92 |
file_paths.append(current_file)
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
# If the file exists, append without overwriting
|
98 |
-
if file_exists:
|
99 |
-
data.to_csv(current_file, mode='a', header=False, index=False)
|
100 |
else:
|
101 |
-
data
|
|
|
|
|
102 |
|
103 |
# Wait for the next update interval
|
104 |
-
time.sleep(
|
105 |
|
106 |
except Exception as e:
|
107 |
print(f"An error occurred: {e}. Retrying in 5 seconds...")
|
108 |
time.sleep(5)
|
109 |
|
|
|
110 |
def get_available_files():
|
111 |
return [f for f in file_paths if os.path.isfile(f)]
|
112 |
|
|
|
113 |
def update_file_list():
|
114 |
return gr.update(choices=get_available_files())
|
115 |
|
|
|
116 |
def update_token_count():
|
117 |
return combined_tokens
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
# Start the data generation in a separate thread
|
120 |
thread = threading.Thread(target=generate_and_save_data)
|
121 |
thread.daemon = True
|
@@ -126,7 +142,8 @@ with gr.Blocks() as app:
|
|
126 |
gr.Markdown("## AI Prompt and Response Generator")
|
127 |
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
|
128 |
|
129 |
-
file_selector = gr.Dropdown(label="Select a data file to download", choices=get_available_files())
|
|
|
130 |
download_button = gr.File(label="Download Selected File")
|
131 |
|
132 |
def download_file(selected_file):
|
@@ -134,6 +151,7 @@ with gr.Blocks() as app:
|
|
134 |
|
135 |
refresh_button = gr.Button("Refresh File List")
|
136 |
refresh_button.click(update_file_list, outputs=file_selector)
|
|
|
137 |
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
|
138 |
|
139 |
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
|
|
|
1 |
import pandas as pd
|
|
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
import threading
|
5 |
import time
|
6 |
+
from groq import Groq
|
7 |
|
8 |
+
# Initialize Groq client
|
9 |
client = Groq()
|
10 |
+
|
11 |
+
# Constants
|
12 |
+
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
|
13 |
+
DATA_DIRECTORY = 'data'
|
14 |
+
UPDATE_INTERVAL = 1 # Update interval in seconds
|
15 |
+
|
16 |
+
# Ensure the data directory exists
|
17 |
+
os.makedirs(DATA_DIRECTORY, exist_ok=True)
|
18 |
+
|
19 |
+
# Initialize variables
|
20 |
file_index = 1
|
21 |
+
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
|
|
|
22 |
file_paths = [current_file]
|
23 |
combined_tokens = 0
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# Helper function to get file size
|
26 |
def get_file_size(filename):
|
27 |
+
return os.path.getsize(filename) if os.path.isfile(filename) else 0
|
|
|
|
|
28 |
|
29 |
+
# Data generation and saving function
|
30 |
def generate_and_save_data():
|
31 |
global file_index, current_file, file_paths, combined_tokens
|
32 |
+
|
33 |
+
# Create the initial file if it doesn't exist
|
34 |
+
if not os.path.isfile(current_file):
|
35 |
+
pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
|
36 |
+
|
37 |
while True:
|
38 |
try:
|
39 |
# Generate a prompt
|
|
|
42 |
messages=[
|
43 |
{
|
44 |
"role": "user",
|
45 |
+
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
|
46 |
}
|
47 |
],
|
48 |
temperature=1,
|
|
|
56 |
prompt_tokens = 0
|
57 |
for chunk in completion:
|
58 |
content = chunk.choices[0].delta.content
|
59 |
+
if content:
|
60 |
prompt += content
|
61 |
+
prompt_tokens += len(content.split())
|
62 |
|
63 |
# Use the generated prompt to query the model again
|
64 |
second_completion = client.chat.completions.create(
|
|
|
70 |
}
|
71 |
],
|
72 |
temperature=1,
|
73 |
+
max_tokens=5000,
|
74 |
top_p=1,
|
75 |
stream=True,
|
76 |
stop=None,
|
|
|
80 |
response_tokens = 0
|
81 |
for chunk in second_completion:
|
82 |
content = chunk.choices[0].delta.content
|
83 |
+
if content:
|
84 |
response += content
|
85 |
+
response_tokens += len(content.split())
|
86 |
|
87 |
# Update the combined token count
|
88 |
combined_tokens += (prompt_tokens + response_tokens)
|
|
|
95 |
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
|
96 |
|
97 |
# Check the size of the current file
|
98 |
+
if get_file_size(current_file) >= MAX_SIZE:
|
99 |
file_index += 1
|
100 |
+
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
|
101 |
file_paths.append(current_file)
|
102 |
+
# Create the new file with headers
|
103 |
+
with open(current_file, 'w') as f:
|
104 |
+
data.to_csv(f, header=True, index=False)
|
|
|
|
|
|
|
|
|
105 |
else:
|
106 |
+
# Append data to the current file
|
107 |
+
with open(current_file, 'a') as f:
|
108 |
+
data.to_csv(f, header=False, index=False)
|
109 |
|
110 |
# Wait for the next update interval
|
111 |
+
time.sleep(UPDATE_INTERVAL)
|
112 |
|
113 |
except Exception as e:
|
114 |
print(f"An error occurred: {e}. Retrying in 5 seconds...")
|
115 |
time.sleep(5)
|
116 |
|
117 |
+
# Get available files
|
118 |
def get_available_files():
|
119 |
return [f for f in file_paths if os.path.isfile(f)]
|
120 |
|
121 |
+
# Update file list
|
122 |
def update_file_list():
|
123 |
return gr.update(choices=get_available_files())
|
124 |
|
125 |
+
# Update token count
|
126 |
def update_token_count():
|
127 |
return combined_tokens
|
128 |
|
129 |
+
# Display file content
|
130 |
+
def display_file_content(selected_file):
|
131 |
+
if selected_file:
|
132 |
+
return pd.read_csv(selected_file)
|
133 |
+
return pd.DataFrame()
|
134 |
+
|
135 |
# Start the data generation in a separate thread
|
136 |
thread = threading.Thread(target=generate_and_save_data)
|
137 |
thread.daemon = True
|
|
|
142 |
gr.Markdown("## AI Prompt and Response Generator")
|
143 |
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
|
144 |
|
145 |
+
file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
|
146 |
+
file_viewer = gr.DataFrame(label="CSV File Content")
|
147 |
download_button = gr.File(label="Download Selected File")
|
148 |
|
149 |
def download_file(selected_file):
|
|
|
151 |
|
152 |
refresh_button = gr.Button("Refresh File List")
|
153 |
refresh_button.click(update_file_list, outputs=file_selector)
|
154 |
+
file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
|
155 |
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
|
156 |
|
157 |
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
|