Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
# Load the JinaAI ReaderLM-v2 model | |
model_name = "jinaai/ReaderLM-v2" | |
html_converter = pipeline("text-generation", model=model_name) | |
# Function to convert HTML to Markdown or JSON | |
def convert_html(html_input, output_format): | |
# Prepare the prompt for the model | |
prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}" | |
# Generate the output using the model | |
response = html_converter(prompt, max_length=500, num_return_sequences=1) | |
converted_output = response[0]['generated_text'] | |
# Extract the relevant part of the output (remove the prompt) | |
converted_output = converted_output.replace(prompt, "").strip() | |
return converted_output | |
# Gradio Interface with NoCrypt/miku theme | |
interface = gr.Interface( | |
fn=convert_html, | |
inputs=[ | |
gr.Textbox(lines=10, placeholder="Paste your raw HTML here...", label="Raw HTML Input"), | |
gr.Radio(["Markdown", "JSON"], label="Output Format", value="Markdown") | |
], | |
outputs=gr.Textbox(lines=10, label="Converted Output"), | |
title="HTML to Markdown/JSON Converter", | |
description="Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.", | |
theme="NoCrypt/miku", # Apply the NoCrypt/miku theme | |
examples=[ | |
["<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>", "Markdown"], | |
["<ul><li>Item 1</li><li>Item 2</li></ul>", "JSON"] | |
] | |
) | |
# Launch the interface | |
interface.launch() |