File size: 4,844 Bytes
3cf27bd
 
 
 
 
 
 
571f7e3
3cf27bd
 
 
8206fd5
 
 
3cf27bd
 
8206fd5
 
 
 
 
3cf27bd
 
8206fd5
 
3cf27bd
571f7e3
 
8206fd5
 
571f7e3
 
 
8206fd5
 
571f7e3
8206fd5
571f7e3
8206fd5
ee88838
8206fd5
 
571f7e3
8206fd5
571f7e3
8206fd5
571f7e3
 
3cf27bd
 
571f7e3
8206fd5
ca58a74
8206fd5
571f7e3
ca58a74
 
8206fd5
ca58a74
 
 
 
 
 
 
8206fd5
ca58a74
3cf27bd
8206fd5
ca58a74
3cf27bd
8206fd5
571f7e3
8206fd5
571f7e3
 
8206fd5
571f7e3
 
 
 
 
 
8206fd5
571f7e3
 
 
 
 
8206fd5
571f7e3
 
 
 
 
8206fd5
571f7e3
 
8206fd5
 
 
571f7e3
 
3cf27bd
8206fd5
3cf27bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import markdown
from markdown.extensions.tables import TableExtension
from markdown.extensions.fenced_code import FencedCodeExtension
from markdown.extensions.toc import TocExtension
from markdown.extensions.attr_list import AttrListExtension
from markdown.extensions.codehilite import CodeHiliteExtension
from transformers import pipeline

# Function to render markdown to HTML with extensions
def render_markdown(md_text):
    print("[DEBUG] render_markdown called with input:", md_text)  # Debug log for input
    # Convert the input markdown text to HTML using various extensions for additional functionality
    rendered_html = markdown.markdown(
        md_text,
        extensions=[
            TableExtension(),  # Enables rendering of tables in markdown
            FencedCodeExtension(),  # Supports fenced code blocks
            TocExtension(baselevel=2),  # Generates a table of contents starting at level 2
            AttrListExtension(),  # Allows adding attributes to markdown elements
            CodeHiliteExtension(linenums=False, css_class="highlight"),  # Syntax highlighting for code blocks
        ],
    )
    print("[DEBUG] Rendered HTML output:", rendered_html)  # Debug log for output
    return rendered_html

# Load the JinaAI ReaderLM-v2 model
model_name = "jinaai/ReaderLM-v2"
print("[DEBUG] Loading model:", model_name)  # Debug log for model loading
html_converter = pipeline("text-generation", model=model_name)  # Initialize the text-generation pipeline with the specified model

# Function to convert HTML to Markdown or JSON
def convert_html(html_input, output_format):
    print("[DEBUG] convert_html called with inputs:", html_input, output_format)  # Debug log for inputs
    # Prepare the prompt for the model, specifying the desired output format (Markdown or JSON)
    prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}"
    print("[DEBUG] Generated prompt:", prompt)  # Debug log for the prompt
    
    # Use the model to generate the conversion output
    response = html_converter(prompt, max_length=99999, num_return_sequences=1)
    print("[DEBUG] Model response:", response)  # Debug log for model response
    converted_output = response[0]['generated_text']  # Extract the generated text from the model response
    
    # Remove the prompt text from the generated output and clean up the result
    converted_output = converted_output.replace(prompt, "").strip()
    print("[DEBUG] Converted output:", converted_output)  # Debug log for the final output
    return converted_output

# Creating the Gradio Interface
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:

    # Tab for the Markdown live preview feature
    with gr.Tab("Live Preview"):
        gr.Markdown("# Markdown Suite")  # Header for the tab

        with gr.Row():
            with gr.Column():
                # Input textbox for entering Markdown text
                md_input = gr.Textbox(
                    lines=20, 
                    placeholder="Write your markdown here...", 
                    label="Markdown Input",
                    elem_classes=["gr-textbox"]
                )
            with gr.Column():
                # Output area to display the rendered HTML from the Markdown input
                md_output = gr.HTML(label="Rendered Output", elem_classes=["gr-html"])

        # Define the interaction: Update the HTML preview whenever the Markdown input changes
        md_input.change(render_markdown, inputs=md_input, outputs=md_output)

    # Tab for HTML to Markdown/JSON conversion feature
    with gr.Tab("HTML to Markdown/JSON"):
        gr.Markdown("# HTML to Markdown/JSON Converter")  # Header for the tab

        with gr.Row():
            # Input textbox for raw HTML input
            html_input = gr.Textbox(
                lines=10, 
                placeholder="Paste your raw HTML here...", 
                label="Raw HTML Input"
            )

            # Radio buttons to select the output format (Markdown or JSON)
            output_format = gr.Radio([
                "Markdown", 
                "JSON"
            ], label="Output Format", value="Markdown")

        # Output textbox to display the converted Markdown or JSON
        converted_output = gr.Textbox(
            lines=10, 
            label="Converted Output"
        )

        # Define the interaction: Convert HTML when the "Convert" button is clicked
        convert_button = gr.Button("Convert")
        convert_button.click(
            convert_html,  # Function to handle conversion
            inputs=[html_input, output_format],  # Inputs: Raw HTML and desired output format
            outputs=converted_output  # Output: Converted text
        )

# Launch the app
print("[DEBUG] Launching the app")  # Debug log for app launch
demo.launch()