oscarwang2 commited on
Commit
ddecd6a
1 Parent(s): 601e197

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -30
app.py CHANGED
@@ -1,30 +1,39 @@
1
  import pandas as pd
2
- from groq import Groq
3
  import os
4
  import gradio as gr
5
  import threading
6
  import time
 
7
 
 
8
  client = Groq()
9
- max_size = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
 
 
 
 
 
 
 
 
 
10
  file_index = 1
11
- data_directory = 'data'
12
- current_file = os.path.join(data_directory, f'data{file_index}.csv')
13
  file_paths = [current_file]
14
  combined_tokens = 0
15
- update_interval = 1 # Update interval in seconds
16
-
17
- # Ensure the data directory exists
18
- if not os.path.exists(data_directory):
19
- os.makedirs(data_directory)
20
 
 
21
  def get_file_size(filename):
22
- if os.path.isfile(filename):
23
- return os.path.getsize(filename)
24
- return 0
25
 
 
26
  def generate_and_save_data():
27
  global file_index, current_file, file_paths, combined_tokens
 
 
 
 
 
28
  while True:
29
  try:
30
  # Generate a prompt
@@ -33,7 +42,7 @@ def generate_and_save_data():
33
  messages=[
34
  {
35
  "role": "user",
36
- "content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that I is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
37
  }
38
  ],
39
  temperature=1,
@@ -47,9 +56,9 @@ def generate_and_save_data():
47
  prompt_tokens = 0
48
  for chunk in completion:
49
  content = chunk.choices[0].delta.content
50
- if content is not None:
51
  prompt += content
52
- prompt_tokens += len(content.split()) # Assuming tokens are words for simplicity
53
 
54
  # Use the generated prompt to query the model again
55
  second_completion = client.chat.completions.create(
@@ -61,7 +70,7 @@ def generate_and_save_data():
61
  }
62
  ],
63
  temperature=1,
64
- max_tokens=1024,
65
  top_p=1,
66
  stream=True,
67
  stop=None,
@@ -71,9 +80,9 @@ def generate_and_save_data():
71
  response_tokens = 0
72
  for chunk in second_completion:
73
  content = chunk.choices[0].delta.content
74
- if content is not None:
75
  response += content
76
- response_tokens += len(content.split()) # Assuming tokens are words for simplicity
77
 
78
  # Update the combined token count
79
  combined_tokens += (prompt_tokens + response_tokens)
@@ -86,36 +95,43 @@ def generate_and_save_data():
86
  data = pd.DataFrame({"prompt": [prompt], "response": [response]})
87
 
88
  # Check the size of the current file
89
- if get_file_size(current_file) >= max_size:
90
  file_index += 1
91
- current_file = os.path.join(data_directory, f'data{file_index}.csv')
92
  file_paths.append(current_file)
93
-
94
- # Check if the current file exists
95
- file_exists = os.path.isfile(current_file)
96
-
97
- # If the file exists, append without overwriting
98
- if file_exists:
99
- data.to_csv(current_file, mode='a', header=False, index=False)
100
  else:
101
- data.to_csv(current_file, mode='w', header=True, index=False)
 
 
102
 
103
  # Wait for the next update interval
104
- time.sleep(update_interval)
105
 
106
  except Exception as e:
107
  print(f"An error occurred: {e}. Retrying in 5 seconds...")
108
  time.sleep(5)
109
 
 
110
  def get_available_files():
111
  return [f for f in file_paths if os.path.isfile(f)]
112
 
 
113
  def update_file_list():
114
  return gr.update(choices=get_available_files())
115
 
 
116
  def update_token_count():
117
  return combined_tokens
118
 
 
 
 
 
 
 
119
  # Start the data generation in a separate thread
120
  thread = threading.Thread(target=generate_and_save_data)
121
  thread.daemon = True
@@ -126,7 +142,8 @@ with gr.Blocks() as app:
126
  gr.Markdown("## AI Prompt and Response Generator")
127
  gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
128
 
129
- file_selector = gr.Dropdown(label="Select a data file to download", choices=get_available_files())
 
130
  download_button = gr.File(label="Download Selected File")
131
 
132
  def download_file(selected_file):
@@ -134,6 +151,7 @@ with gr.Blocks() as app:
134
 
135
  refresh_button = gr.Button("Refresh File List")
136
  refresh_button.click(update_file_list, outputs=file_selector)
 
137
  file_selector.change(download_file, inputs=file_selector, outputs=download_button)
138
 
139
  token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
 
1
  import pandas as pd
 
2
  import os
3
  import gradio as gr
4
  import threading
5
  import time
6
+ from groq import Groq
7
 
8
+ # Initialize Groq client
9
  client = Groq()
10
+
11
+ # Constants
12
+ MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
13
+ DATA_DIRECTORY = 'data'
14
+ UPDATE_INTERVAL = 1 # Update interval in seconds
15
+
16
+ # Ensure the data directory exists
17
+ os.makedirs(DATA_DIRECTORY, exist_ok=True)
18
+
19
+ # Initialize variables
20
  file_index = 1
21
+ current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
 
22
  file_paths = [current_file]
23
  combined_tokens = 0
 
 
 
 
 
24
 
25
+ # Helper function to get file size
26
  def get_file_size(filename):
27
+ return os.path.getsize(filename) if os.path.isfile(filename) else 0
 
 
28
 
29
+ # Data generation and saving function
30
  def generate_and_save_data():
31
  global file_index, current_file, file_paths, combined_tokens
32
+
33
+ # Create the initial file if it doesn't exist
34
+ if not os.path.isfile(current_file):
35
+ pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
36
+
37
  while True:
38
  try:
39
  # Generate a prompt
 
42
  messages=[
43
  {
44
  "role": "user",
45
+ "content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
46
  }
47
  ],
48
  temperature=1,
 
56
  prompt_tokens = 0
57
  for chunk in completion:
58
  content = chunk.choices[0].delta.content
59
+ if content:
60
  prompt += content
61
+ prompt_tokens += len(content.split())
62
 
63
  # Use the generated prompt to query the model again
64
  second_completion = client.chat.completions.create(
 
70
  }
71
  ],
72
  temperature=1,
73
+ max_tokens=5000,
74
  top_p=1,
75
  stream=True,
76
  stop=None,
 
80
  response_tokens = 0
81
  for chunk in second_completion:
82
  content = chunk.choices[0].delta.content
83
+ if content:
84
  response += content
85
+ response_tokens += len(content.split())
86
 
87
  # Update the combined token count
88
  combined_tokens += (prompt_tokens + response_tokens)
 
95
  data = pd.DataFrame({"prompt": [prompt], "response": [response]})
96
 
97
  # Check the size of the current file
98
+ if get_file_size(current_file) >= MAX_SIZE:
99
  file_index += 1
100
+ current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
101
  file_paths.append(current_file)
102
+ # Create the new file with headers
103
+ with open(current_file, 'w') as f:
104
+ data.to_csv(f, header=True, index=False)
 
 
 
 
105
  else:
106
+ # Append data to the current file
107
+ with open(current_file, 'a') as f:
108
+ data.to_csv(f, header=False, index=False)
109
 
110
  # Wait for the next update interval
111
+ time.sleep(UPDATE_INTERVAL)
112
 
113
  except Exception as e:
114
  print(f"An error occurred: {e}. Retrying in 5 seconds...")
115
  time.sleep(5)
116
 
117
+ # Get available files
118
  def get_available_files():
119
  return [f for f in file_paths if os.path.isfile(f)]
120
 
121
+ # Update file list
122
  def update_file_list():
123
  return gr.update(choices=get_available_files())
124
 
125
+ # Update token count
126
  def update_token_count():
127
  return combined_tokens
128
 
129
+ # Display file content
130
+ def display_file_content(selected_file):
131
+ if selected_file:
132
+ return pd.read_csv(selected_file)
133
+ return pd.DataFrame()
134
+
135
  # Start the data generation in a separate thread
136
  thread = threading.Thread(target=generate_and_save_data)
137
  thread.daemon = True
 
142
  gr.Markdown("## AI Prompt and Response Generator")
143
  gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
144
 
145
+ file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
146
+ file_viewer = gr.DataFrame(label="CSV File Content")
147
  download_button = gr.File(label="Download Selected File")
148
 
149
  def download_file(selected_file):
 
151
 
152
  refresh_button = gr.Button("Refresh File List")
153
  refresh_button.click(update_file_list, outputs=file_selector)
154
+ file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
155
  file_selector.change(download_file, inputs=file_selector, outputs=download_button)
156
 
157
  token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)