Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from datasets import load_dataset
|
3 |
+
import pandas as pd
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
# Step 1: Load dataset and save to CSV (auto-training data)
|
9 |
+
def load_and_save_dataset():
|
10 |
+
print("Loading dataset from Hugging Face...")
|
11 |
+
dataset = load_dataset("HuggingFaceFW/fineweb", split="train")
|
12 |
+
print("Saving dataset to data.csv...")
|
13 |
+
dataset.to_csv("data.csv")
|
14 |
+
print("Done! Data saved to data.csv.")
|
15 |
+
return "Dataset loaded and saved to data.csv."
|
16 |
+
|
17 |
+
# Run on startup
|
18 |
+
load_and_save_dataset()
|
19 |
+
|
20 |
+
# Step 2: Load GPT-2 model for inference
|
21 |
+
model_name = "gpt2" # or "distilgpt2" for faster inference
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
24 |
+
|
25 |
+
# Create generator pipeline
|
26 |
+
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=-1)
|
27 |
+
|
28 |
+
# Function to generate responses
|
29 |
+
def generate_response(prompt):
|
30 |
+
responses = generator(
|
31 |
+
prompt,
|
32 |
+
max_length=100,
|
33 |
+
do_sample=True,
|
34 |
+
temperature=0.7,
|
35 |
+
top_k=50,
|
36 |
+
top_p=0.95,
|
37 |
+
num_return_sequences=1
|
38 |
+
)
|
39 |
+
return responses[0]['generated_text'].strip()
|
40 |
+
|
41 |
+
# Step 3: Build Gradio interface
|
42 |
+
with gr.Blocks() as demo:
|
43 |
+
gr.Markdown("## GPT-2 Based AI Assistant with Dataset Loaded from Hugging Face")
|
44 |
+
gr.Textbox(value="Loading dataset...", interactive=False, lines=2)
|
45 |
+
fetch_button = gr.Button("Load Dataset and Save CSV")
|
46 |
+
output_message = gr.Textbox()
|
47 |
+
|
48 |
+
def fetch_and_confirm():
|
49 |
+
msg = load_and_save_dataset()
|
50 |
+
return msg
|
51 |
+
|
52 |
+
fetch_button.click(fetch_and_confirm, outputs=output_message)
|
53 |
+
|
54 |
+
gr.Markdown("### Ask the AI Assistant")
|
55 |
+
prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Say something...")
|
56 |
+
response_output = gr.Textbox(label="Response", lines=10)
|
57 |
+
|
58 |
+
def respond(prompt):
|
59 |
+
return generate_response(prompt)
|
60 |
+
|
61 |
+
gr.Button("Ask").click(respond, inputs=prompt_input, outputs=response_output)
|
62 |
+
|
63 |
+
demo.launch()
|