Spaces:

fuhsiao
/

Ext-Abs-StructuredSum

Runtime error

File size: 2,716 Bytes

9b6c439
4d21bee
 
89ea49b
46d02f4
dfcc660
 
 
89ea49b
dfcc660
 
 
89ea49b
46d02f4
dfcc660
 
 
89ea49b
46d02f4
dfcc660
 
 
 
 
 
 
89ea49b
4d21bee
411567e
 
de45564
411567e
4d21bee
411567e
4d21bee
 
411567e
4d21bee
 
99d8161
 
f321fd2
 
2280fc9
 
dfcc660
2280fc9
dfcc660
2280fc9
 
f321fd2
 
 
 
f6b2292
99d8161
f321fd2
4d21bee
 
76f1e3b
 
dfcc660
 
 
76f1e3b

from utils import *
import gradio as gr

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def download_model():
    # 下載並快取SentenceTransformer所需的模型和tokenizer
    SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    
    # 下載並快取AutoTokenizer所需的模型
    biobart_model = "fuhsiao/BioBART-PMC-EXT-Section"
    AutoTokenizer.from_pretrained(biobart_model)
    AutoModelForSeq2SeqLM.from_pretrained(biobart_model)
    
    # 下載並快取AutoModelForSeq2SeqLM所需的模型
    bart_model = "fuhsiao/BART-PMC-EXT-Section"
    AutoTokenizer.from_pretrained(bart_model)
    AutoModelForSeq2SeqLM.from_pretrained(bart_model)
    
    return True






def main(file, ext_threshold, article_type):
    
    if file is None or ext_threshold is None or article_type is None:
        return 'Please confirm that the file and settings are correct.'
    
    paper = read_text_to_json(file.name)
    
    if not is_valid_format(paper):
        return "invalid_format"
    
    sentJson = convert_to_sentence_json(paper)
    sentFeat = extract_sentence_features(sentJson)

    ExtModel = load_ExtModel('model/LGB_model_F10_S.pkl')
    ext = extractive_method(sentJson, sentFeat, ExtModel, threshold=ext_threshold, TGB=False)

    abstr_model_path = ''
    if article_type == 'non-specialized field':
        abstr_model_path = 'fuhsiao/BART-PMC-EXT-Section'
    elif article_type == 'biomedical field':
        abstr_model_path = 'fuhsiao/BioBART-PMC-EXT-Section'
        
    TOKENIZER, ABSTRMODEL = load_AbstrModel(abstr_model_path)
    abstr = abstractive_method(ext, tokenizer=TOKENIZER, model=ABSTRMODEL)

    result = ''
    for key, sec in zip(['I','M','R','D'], ['Introduction', 'Methods', 'Results', 'Discussion/Conclusion']):
        result += f"{sec}\n{abstr[key]}\n\n"
    
    return result
    

if __name__ == '__main__':

    download_model()
    

    # 定義Gradio介面
    iface = gr.Interface(
        fn=main,
        inputs=[
            gr.inputs.File(),
            gr.inputs.Slider(minimum=0.5, maximum=1, default=0.5, step=0.01, label="Extractive - Threshold"),
            gr.inputs.Dropdown(["non-specialized field", "biomedical field"],default="non-specialized field", label="Abstractive - Field")
        ],
        outputs=gr.outputs.Textbox(label="Output - Structured Abstract"),
        title="Ext-Abs-StructuredSum",
        description="please upload a .txt file formatted in the form of the example.",
        # examples=[['text.txt']],
        allow_flagging='never'
    )

    # 啟動Gradio介面
    iface.launch(share=False)  # share=False 用於停用分享模式