Spaces:
Running
Running
File size: 3,510 Bytes
6d13e5a 99ab207 6d13e5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install Dependencies\n",
"!pip install gradio>=3.0 transformers>=4.25.0 torch>=1.12.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import Libraries\n",
"import gradio as gr\n",
"from transformers import pipeline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load the JinaAI ReaderLM-v2 Model\n",
"model_name = \"jinaai/ReaderLM-v2\"\n",
"html_converter = pipeline(\"text-generation\", model=model_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Function to Convert HTML to Markdown or JSON\n",
"def convert_html(html_input, output_format):\n",
" # Prepare the prompt for the model\n",
" prompt = f\"Convert the following HTML into {output_format}:\\n\\n{html_input}\"\n",
" \n",
" # Generate the output using the model\n",
" response = html_converter(prompt, max_length=500, num_return_sequences=1)\n",
" converted_output = response[0]['generated_text']\n",
" \n",
" # Extract the relevant part of the output (remove the prompt)\n",
" converted_output = converted_output.replace(prompt, \"\").strip()\n",
" return converted_output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create the Gradio Interface\n",
"iface = gr.Interface(\n",
" fn=convert_html, # Function to call\n",
" inputs=[\n",
" gr.Textbox(lines=10, placeholder=\"Paste your raw HTML here...\", label=\"Raw HTML Input\"),\n",
" gr.Radio([\"Markdown\", \"JSON\"], label=\"Output Format\", value=\"Markdown\")\n",
" ],\n",
" outputs=gr.Textbox(lines=10, label=\"Converted Output\"),\n",
" title=\"HTML to Markdown/JSON Converter\",\n",
" description=\"Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.\",\n",
" theme=\"NoCrypt/miku\", # Apply the NoCrypt/miku theme\n",
" examples=[\n",
" [\"<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>\", \"Markdown\"],\n",
" [\"<ul><li>Item 1</li><li>Item 2</li></ul>\", \"JSON\"]\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Launch the Interface\n",
"iface.launch(inline=True) # Embed the interface in the notebook"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
} |