Kalyani8 commited on
Commit
87188d3
·
verified ·
1 Parent(s): 75cb8fa

Update app.py

Browse files

Load 12 K articles from wiki40B of Wikipedia Dataset

Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,8 +1,8 @@
1
- import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
-
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
8
 
 
 
1
+ from datasets import load_dataset
2
+ # Load a small subset (12,000 rows)
3
+ dataset = load_dataset("wiki40b", "en", split="train[:12000]")
4
 
5
+ # Extract only text
6
+ docs = [d["text"] for d in dataset]
 
 
 
7
 
8
+ print("Loaded dataset with", len(docs), "documents.")