File size: 1,211 Bytes
a9ae4d6
 
2d9a7bb
a9ae4d6
d6bb012
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89dee4c
 
d6bb012
 
 
 
89dee4c
d6bb012
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from annotated_text import annotated_text
from io import StringIO

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import plotly.express as px
from streamlit_option_menu import option_menu

st. set_page_config(layout="wide")

from transformers import pipeline
import pandas as pd

@st.cache(allow_output_mutation = True)
def init_text_summarization_model():
    MODEL = 'facebook/bart-large-cnn'
    pipe = pipeline("summarization", model=MODEL)
    return pipe

@st.cache(allow_output_mutation = True)
def init_zsl_topic_classification():
    MODEL = 'facebook/bart-large-mnli'
    pipe = pipeline("zero-shot-classification", model=MODEL)
    template = "This text is about {}."
    return pipe, template

# Model initialization    
pipeline_summarization = init_text_summarization_model()
pipeline_zsl, template = init_zsl_topic_classification()

st.header("Intelligent Document Automation")

uploaded_file = st.file_uploader("Choose a file")

def get_text_from_ocr_engine(uploaded_file):
    return "This is a sample text for named entity recognition and other tasks"


if uploaded_file is not None:
    ocr_text  = get_text_from_ocr_engine(uploaded_file)
    st.write(ocr_text)