Spaces:
Paused
Paused
import gradio as gr | |
import markdown | |
from markdown.extensions.tables import TableExtension | |
from markdown.extensions.fenced_code import FencedCodeExtension | |
from markdown.extensions.toc import TocExtension | |
from markdown.extensions.attr_list import AttrListExtension | |
from markdown.extensions.codehilite import CodeHiliteExtension | |
from transformers import pipeline | |
# Function to render markdown to HTML with extensions | |
def render_markdown(md_text): | |
return markdown.markdown( | |
md_text, | |
extensions=[ | |
TableExtension(), | |
FencedCodeExtension(), | |
TocExtension(baselevel=2), | |
AttrListExtension(), | |
CodeHiliteExtension(linenums=False, css_class="highlight"), | |
], | |
) | |
# Load the JinaAI ReaderLM-v2 model | |
model_name = "jinaai/ReaderLM-v2" | |
html_converter = pipeline("text-generation", model=model_name) | |
# Function to convert HTML to Markdown or JSON | |
def convert_html(html_input, output_format): | |
# Prepare the prompt for the model | |
prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}" | |
# Generate the output using the model | |
response = html_converter(prompt, max_length=500, num_return_sequences=1) | |
converted_output = response[0]['generated_text'] | |
# Extract the relevant part of the output (remove the prompt) | |
converted_output = converted_output.replace(prompt, "").strip() | |
return converted_output | |
# Creating the Gradio Interface | |
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: | |
with gr.Tab("Live Preview"): | |
gr.Markdown("# Markdown Suite") | |
with gr.Row(): | |
with gr.Column(): | |
md_input = gr.Textbox( | |
lines=20, | |
placeholder="Write your markdown here...", | |
label="Markdown Input", | |
elem_classes=["gr-textbox"] | |
) | |
with gr.Column(): | |
md_output = gr.HTML(label="Rendered Output", elem_classes=["gr-html"]) | |
md_input.change(render_markdown, inputs=md_input, outputs=md_output) | |
with gr.Tab("HTML to Markdown/JSON"): | |
gr.Markdown("# HTML to Markdown/JSON Converter") | |
with gr.Row(): | |
html_input = gr.Textbox( | |
lines=10, | |
placeholder="Paste your raw HTML here...", | |
label="Raw HTML Input" | |
) | |
output_format = gr.Radio([ | |
"Markdown", | |
"JSON" | |
], label="Output Format", value="Markdown") | |
converted_output = gr.Textbox( | |
lines=10, | |
label="Converted Output" | |
) | |
# Define interaction | |
convert_button = gr.Button("Convert") | |
convert_button.click( | |
convert_html, | |
inputs=[html_input, output_format], | |
outputs=converted_output | |
) | |
# Launch the app | |
demo.launch() |