import requests import gradio as gr def extract_text_from_url(url): try: # Request data from the URL response = requests.get(url) response.raise_for_status() # Raise an error for bad responses data = response.json() # Extract 'text' from each row rows = data.get("rows", []) texts = [row["row"]["text"] for row in rows if "text" in row["row"]] # Return as a single string with newlines return "\n".join(texts) except Exception as e: return f"An error occurred: {e}" # Gradio interface interface = gr.Interface( fn=extract_text_from_url, inputs=gr.Textbox(label="Dataset URL", placeholder="Enter the dataset URL"), outputs=gr.Textbox(label="Extracted Texts", lines=20, placeholder="Extracted texts will appear here"), title="Extract Text from Hugging Face Dataset", description="Enter the URL of a Hugging Face dataset to extract and display the 'text' fields. https://datasets-server.huggingface.co/rows?dataset=pszemraj%2Fmidjourney-messages-cleaned&config=deduped&split=train&length=100&offset=0" ) if __name__ == "__main__": interface.launch()