girijareddy commited on
Commit
6175827
·
1 Parent(s): 1b49bcf

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -77
app.py DELETED
@@ -1,77 +0,0 @@
1
- import spacy
2
- from spacy.lang.en.stop_words import STOP_WORDS
3
- from string import punctuation
4
- from spacy import tokens
5
- import streamlit as st
6
- from heapq import nlargest
7
- import subprocess
8
- subprocess.run("pip3 install PyPDF2".split())
9
- subprocess.run("python3 -m spacy download en_core_web_sm".split())
10
- import PyPDF2
11
- from utils import (
12
- clean_text,
13
- fetch_article_text,
14
- preprocess_text_for_abstractive_summarization,
15
- read_text_from_file,
16
- )
17
- #---------------------Pre-Requiste------------------------#
18
- stopwords = STOP_WORDS
19
- punctuation = punctuation + '\n'
20
-
21
-
22
-
23
- if __name__=="__main__":
24
- st.title("Text Summarizer 📝")
25
- st.subheader("Creator: Shreyas Dixit")
26
-
27
- n = st.sidebar.slider('Summarization %',10,90,step=10)
28
- n = n/100
29
- type=st.selectbox('Pick one', ['PDF','Text'])
30
- if type=="PDF":
31
- #Upload file
32
- uploaded_file = st.file_uploader("Choose a file",type=['pdf','txt','docx'])
33
- text = read_text_from_file(uploaded_file)
34
- # FileName = uploaded_file.name
35
- # if uploaded_file is not None:
36
- # pdfFileObj = open("{FileName}", 'rb')
37
- # pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
38
- # pageObj = pdfReader.getPage(0)
39
- # text = (pageObj.extractText())
40
- # pdfFileObj.close()
41
- elif type=="Text":
42
- #Text
43
- text=st.text_area("Input text !")
44
-
45
- if st.button('Summarize'):
46
- nlp = spacy.load('en_core_web_sm')
47
- doc = nlp(text)
48
- #Word tokenization
49
- tokens = [tokens.text for tokens in doc]
50
- word_frquency = {}
51
- for word in doc:
52
- if word.text.lower() not in stopwords:
53
- if word.text.lower() not in punctuation:
54
- if word.text not in word_frquency.keys():
55
- word_frquency[word.text] = 1
56
- else:
57
- word_frquency[word.text] += 1
58
- #Normalize the values
59
- max_word = max(word_frquency.values())
60
- for word in word_frquency.keys():
61
- word_frquency[word] = word_frquency[word]/max_word
62
- #Sentence Tokenization
63
- sentence_token = [sent for sent in doc.sents]
64
- sentence_score = {}
65
- for sent in sentence_token:
66
- for word in sent:
67
- if word.text.lower() in word_frquency.keys():
68
- if sent not in sentence_score.keys():
69
- sentence_score[sent] = word_frquency[word.text.lower()]
70
- else:
71
- sentence_score[sent] += word_frquency[word.text.lower()]
72
- #Creating a Summary
73
- select_length = int(len(sentence_token)*n)
74
- summary = nlargest(select_length,sentence_score,key = sentence_score.get)
75
- summary = [word.text for word in summary]
76
- summary = ' '.join(summary)
77
- st.markdown(summary)