aditi2222 commited on
Commit
0530b74
·
1 Parent(s): 9e74e0f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BartTokenizer, BartForConditionalGeneration
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ def fren_en(french_article):
6
+ mname = 'Helsinki-NLP/opus-mt-fr-en'
7
+ tokenizer = AutoTokenizer.from_pretrained(mname)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(mname)
9
+
10
+
11
+ input_ids_1 = tokenizer.encode(article[0:2100], return_tensors="pt")
12
+ outputs_1 = model.generate(input_ids_1)
13
+ decoded_1 = tokenizer.decode(outputs_1[0], skip_special_tokens=True)
14
+
15
+
16
+ input_ids_2 = tokenizer.encode(article[2100:4200], return_tensors="pt")
17
+ outputs_2 = model.generate(input_ids_2)
18
+ decoded_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
19
+
20
+ input_ids_3 = tokenizer.encode(article[4200:6300], return_tensors="pt")
21
+ outputs_3 = model.generate(input_ids_3)
22
+ decoded_3 = tokenizer.decode(outputs_3[0], skip_special_tokens=True)
23
+
24
+ decoded_output = decoded_1 + decoded_2 + decoded_3
25
+ return(decoded_output)
26
+
27
+
28
+ def article_summarization(french_article_for_summarization):
29
+ french_article_for_summarization = fren_en(french_article)
30
+ tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
31
+ model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
32
+
33
+ # generate summary
34
+ input_ids = tokenizer.encode(french_article_for_summarization, return_tensors='pt')
35
+ summary_ids = model.generate(input_ids,
36
+ min_length=20,
37
+ max_length=12000,
38
+ num_beams=10,
39
+ repetition_penalty=2.5,
40
+ length_penalty=1.0,
41
+ early_stopping=True,
42
+ no_repeat_ngram_size=2,
43
+ use_cache=True,
44
+ do_sample = True,
45
+ temperature = 0.8,
46
+ top_k = 50,
47
+ top_p = 0.95)
48
+
49
+ summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
50
+ return(summary_text)
51
+
52
+ iface = gr.Interface(fn=article_summarization,title="French newspaper article sumamrized in English",description="facebook/bart-large-cnn for summarization in English and Helsinki-NLP/opus-mt-fr-en for translation from French to English ",inputs=gr.inputs.Textbox(lines=50, placeholder="Enter newpaper article in French"),outputs="text")
53
+
54
+ iface.launch()
55
+
56
+
57
+
58
+