ReaderLM-v2 / app.py
Felguk's picture
Create app.py
28def44 verified
import gradio as gr
from transformers import pipeline
# Load the JinaAI ReaderLM-v2 model
model_name = "jinaai/ReaderLM-v2"
html_converter = pipeline("text-generation", model=model_name)
# Function to convert HTML to Markdown or JSON
def convert_html(html_input, output_format):
# Prepare the prompt for the model
prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}"
# Generate the output using the model
response = html_converter(prompt, max_length=500, num_return_sequences=1)
converted_output = response[0]['generated_text']
# Extract the relevant part of the output (remove the prompt)
converted_output = converted_output.replace(prompt, "").strip()
return converted_output
# Gradio Interface with NoCrypt/miku theme
interface = gr.Interface(
fn=convert_html,
inputs=[
gr.Textbox(lines=10, placeholder="Paste your raw HTML here...", label="Raw HTML Input"),
gr.Radio(["Markdown", "JSON"], label="Output Format", value="Markdown")
],
outputs=gr.Textbox(lines=10, label="Converted Output"),
title="HTML to Markdown/JSON Converter",
description="Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.",
theme="NoCrypt/miku", # Apply the NoCrypt/miku theme
examples=[
["<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>", "Markdown"],
["<ul><li>Item 1</li><li>Item 2</li></ul>", "JSON"]
]
)
# Launch the interface
interface.launch()