AiCoderv2 commited on
Commit
8b84056
·
verified ·
1 Parent(s): 52779c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -31
app.py CHANGED
@@ -1,31 +1,34 @@
1
- import os
 
2
  from datasets import load_dataset
3
- import pandas as pd
4
- import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
  import gradio as gr
 
7
 
8
- # Step 1: Load dataset and save to CSV (auto-training data)
9
- def load_and_save_dataset():
10
- print("Loading dataset from Hugging Face...")
11
- dataset = load_dataset("HuggingFaceFW/fineweb", split="train")
12
- print("Saving dataset to data.csv...")
13
- dataset.to_csv("data.csv")
14
- print("Done! Data saved to data.csv.")
15
- return "Dataset loaded and saved to data.csv."
16
 
17
- # Run on startup
18
- load_and_save_dataset()
 
 
 
 
 
 
 
 
 
19
 
20
- # Step 2: Load GPT-2 model for inference
21
- model_name = "gpt2" # or "distilgpt2" for faster inference
 
 
 
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
  model = AutoModelForCausalLM.from_pretrained(model_name)
24
-
25
- # Create generator pipeline
26
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=-1)
27
 
28
- # Function to generate responses
29
  def generate_response(prompt):
30
  responses = generator(
31
  prompt,
@@ -38,24 +41,24 @@ def generate_response(prompt):
38
  )
39
  return responses[0]['generated_text'].strip()
40
 
41
- # Step 3: Build Gradio interface
42
  with gr.Blocks() as demo:
43
- gr.Markdown("## GPT-2 Based AI Assistant with Dataset Loaded from Hugging Face")
44
- gr.Textbox(value="Loading dataset...", interactive=False, lines=2)
45
- fetch_button = gr.Button("Load Dataset and Save CSV")
46
- output_message = gr.Textbox()
47
 
48
- def fetch_and_confirm():
49
- msg = load_and_save_dataset()
50
- return msg
51
 
52
- fetch_button.click(fetch_and_confirm, outputs=output_message)
 
 
53
 
54
- gr.Markdown("### Ask the AI Assistant")
55
- prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Say something...")
56
- response_output = gr.Textbox(label="Response", lines=10)
57
 
58
  def respond(prompt):
 
59
  return generate_response(prompt)
60
 
61
  gr.Button("Ask").click(respond, inputs=prompt_input, outputs=response_output)
 
1
+ import threading
2
+ import time
3
  from datasets import load_dataset
 
 
 
4
  import gradio as gr
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
 
7
+ # Global variable to store dataset loading status
8
+ dataset_loaded = False
9
+ dataset_info = "Dataset not loaded yet."
 
 
 
 
 
10
 
11
+ def load_dataset_in_background():
12
+ global dataset_loaded, dataset_info
13
+ try:
14
+ dataset_info = "Loading dataset..."
15
+ dataset = load_dataset("HuggingFaceFW/fineweb", split="train")
16
+ # Save to CSV if needed
17
+ dataset.to_csv("data.csv")
18
+ dataset_info = "Dataset loaded successfully!"
19
+ dataset_loaded = True
20
+ except Exception as e:
21
+ dataset_info = f"Error loading dataset: {e}"
22
 
23
+ # Start dataset loading in background thread
24
+ threading.Thread(target=load_dataset_in_background, daemon=True).start()
25
+
26
+ # Load GPT-2 model for inference
27
+ model_name = "gpt2"
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
29
  model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
30
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=-1)
31
 
 
32
  def generate_response(prompt):
33
  responses = generator(
34
  prompt,
 
41
  )
42
  return responses[0]['generated_text'].strip()
43
 
44
+ # Gradio Interface
45
  with gr.Blocks() as demo:
46
+ gr.Markdown("## AI Assistant with Background Dataset Loading")
47
+ dataset_status = gr.Textbox(value=dataset_info, label="Dataset Loading Status", interactive=False, lines=2)
 
 
48
 
49
+ def get_dataset_status():
50
+ return dataset_info
 
51
 
52
+ # Refresh status button (or auto-update)
53
+ refresh_btn = gr.Button("Check Dataset Status")
54
+ refresh_btn.click(get_dataset_status, outputs=dataset_status)
55
 
56
+ gr.Markdown("### Chat with the AI")
57
+ prompt_input = gr.Textbox(label="Your prompt", placeholder="Ask me anything...")
58
+ response_output = gr.Textbox(label="AI Response", lines=10)
59
 
60
  def respond(prompt):
61
+ # You can implement logic to use dataset info here if needed
62
  return generate_response(prompt)
63
 
64
  gr.Button("Ask").click(respond, inputs=prompt_input, outputs=response_output)