Spaces:
Sleeping
Sleeping
File size: 5,231 Bytes
50f4808 15939d8 50f4808 15939d8 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 15939d8 59eab61 15939d8 50f4808 15939d8 50f4808 15939d8 50f4808 15939d8 ddecd6a 15939d8 50f4808 15939d8 50f4808 15939d8 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 15939d8 50f4808 ddecd6a 15939d8 50f4808 59eab61 ddecd6a 59eab61 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 9825b5e 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import pandas as pd
import os
import gradio as gr
import threading
import time
from gradio_client import Client
# Initialize Gradio client
client = Client("Nymbo/Llama-3.1-405B-Instruct")
# Constants
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
DATA_DIRECTORY = 'data'
UPDATE_INTERVAL = 1 # Update interval in seconds
# Ensure the data directory exists
os.makedirs(DATA_DIRECTORY, exist_ok=True)
# Initialize variables
file_index = 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths = [current_file]
combined_tokens = 0
# Helper function to get file size
def get_file_size(filename):
return os.path.getsize(filename) if os.path.isfile(filename) else 0
# Data generation and saving function
def generate_and_save_data():
global file_index, current_file, file_paths, combined_tokens
# Create the initial file if it doesn't exist
if not os.path.isfile(current_file):
pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
while True:
try:
# Generate a prompt
prompt_result = client.predict(
message="give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that",
system_message="",
max_tokens=1024,
temperature=1,
top_p=1,
api_name="/chat"
)
prompt = prompt_result['message']
prompt_tokens = len(prompt.split())
# Use the generated prompt to query the model again
response_result = client.predict(
message=prompt,
system_message="",
max_tokens=5000,
temperature=1,
top_p=1,
api_name="/chat"
)
response = response_result['message']
response_tokens = len(response.split())
# Update the combined token count
combined_tokens += (prompt_tokens + response_tokens)
# Print the generated prompt and the response
print("Generated prompt:", prompt)
print("Response to the generated prompt:", response)
# Create a DataFrame with the prompt and response
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
# Check the size of the current file
if get_file_size(current_file) >= MAX_SIZE:
file_index += 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths.append(current_file)
# Create the new file with headers
data.to_csv(current_file, index=False)
else:
# Append data to the current file
data.to_csv(current_file, mode='a', header=False, index=False)
# Wait for the next update interval
time.sleep(UPDATE_INTERVAL)
except Exception as e:
print(f"An error occurred: {e}. Retrying in 5 seconds...")
time.sleep(5)
# Get available files
def get_available_files():
return [f for f in file_paths if os.path.isfile(f)]
# Update file list
def update_file_list():
return gr.update(choices=get_available_files())
# Update token count
def update_token_count():
return combined_tokens
# Display file content
def display_file_content(selected_file):
if selected_file:
return pd.read_csv(selected_file)
return pd.DataFrame()
# Start the data generation in a separate thread
thread = threading.Thread(target=generate_and_save_data)
thread.daemon = True
thread.start()
# Create Gradio interface
with gr.Blocks() as app:
gr.Markdown("## AI Prompt and Response Generator")
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
file_viewer = gr.DataFrame(label="CSV File Content")
download_button = gr.File(label="Download Selected File")
def download_file(selected_file):
return selected_file
refresh_button = gr.Button("Refresh File List")
refresh_button.click(update_file_list, outputs=file_selector)
file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
def update_token_display():
return str(update_token_count())
# Update the token count every second
token_refresh = gr.Button("Refresh Token Count")
token_refresh.click(update_token_display, outputs=token_display)
app.launch()
|