Spaces:
Sleeping
Sleeping
import requests | |
import gradio as gr | |
def extract_text_from_url(url): | |
try: | |
# Request data from the URL | |
response = requests.get(url) | |
response.raise_for_status() # Raise an error for bad responses | |
data = response.json() | |
# Extract 'text' from each row | |
rows = data.get("rows", []) | |
texts = [row["row"]["text"] for row in rows if "text" in row["row"]] | |
# Return as a single string with newlines | |
return "\n".join(texts) | |
except Exception as e: | |
return f"An error occurred: {e}" | |
# Gradio interface | |
interface = gr.Interface( | |
fn=extract_text_from_url, | |
inputs=gr.Textbox(label="Dataset URL", placeholder="Enter the dataset URL"), | |
outputs=gr.Textbox(label="Extracted Texts", lines=20, placeholder="Extracted texts will appear here"), | |
title="Extract Text from Hugging Face Dataset", | |
description="Enter the URL of a Hugging Face dataset to extract and display the 'text' fields. https://datasets-server.huggingface.co/rows?dataset=pszemraj%2Fmidjourney-messages-cleaned&config=deduped&split=train&length=100&offset=0" | |
) | |
if __name__ == "__main__": | |
interface.launch() |