File size: 2,901 Bytes
3cf27bd
 
 
 
 
 
 
571f7e3
3cf27bd
 
 
 
 
 
 
 
 
 
 
 
 
 
571f7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cf27bd
 
571f7e3
ca58a74
 
571f7e3
ca58a74
 
 
 
 
 
 
 
 
 
3cf27bd
ca58a74
3cf27bd
571f7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cf27bd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import markdown
from markdown.extensions.tables import TableExtension
from markdown.extensions.fenced_code import FencedCodeExtension
from markdown.extensions.toc import TocExtension
from markdown.extensions.attr_list import AttrListExtension
from markdown.extensions.codehilite import CodeHiliteExtension
from transformers import pipeline

# Function to render markdown to HTML with extensions
def render_markdown(md_text):
    return markdown.markdown(
        md_text,
        extensions=[
            TableExtension(),
            FencedCodeExtension(),
            TocExtension(baselevel=2),
            AttrListExtension(),
            CodeHiliteExtension(linenums=False, css_class="highlight"),
        ],
    )

# Load the JinaAI ReaderLM-v2 model
model_name = "jinaai/ReaderLM-v2"
html_converter = pipeline("text-generation", model=model_name)

# Function to convert HTML to Markdown or JSON
def convert_html(html_input, output_format):
    # Prepare the prompt for the model
    prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}"
    
    # Generate the output using the model
    response = html_converter(prompt, max_length=500, num_return_sequences=1)
    converted_output = response[0]['generated_text']
    
    # Extract the relevant part of the output (remove the prompt)
    converted_output = converted_output.replace(prompt, "").strip()
    return converted_output

# Creating the Gradio Interface
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:

    with gr.Tab("Live Preview"):
        gr.Markdown("# Markdown Suite")

        with gr.Row():
            with gr.Column():
                md_input = gr.Textbox(
                    lines=20, 
                    placeholder="Write your markdown here...", 
                    label="Markdown Input",
                    elem_classes=["gr-textbox"]
                )
            with gr.Column():
                md_output = gr.HTML(label="Rendered Output", elem_classes=["gr-html"])

        md_input.change(render_markdown, inputs=md_input, outputs=md_output)

    with gr.Tab("HTML to Markdown/JSON"):
        gr.Markdown("# HTML to Markdown/JSON Converter")

        with gr.Row():
            html_input = gr.Textbox(
                lines=10, 
                placeholder="Paste your raw HTML here...", 
                label="Raw HTML Input"
            )

            output_format = gr.Radio([
                "Markdown", 
                "JSON"
            ], label="Output Format", value="Markdown")

        converted_output = gr.Textbox(
            lines=10, 
            label="Converted Output"
        )

        # Define interaction
        convert_button = gr.Button("Convert")
        convert_button.click(
            convert_html, 
            inputs=[html_input, output_format], 
            outputs=converted_output
        )

# Launch the app
demo.launch()