Spaces:

mishrasahil934
/

Team_skulk

Running

App Files Files Community

mishrasahil934 commited on Jan 8

Commit

fe97d18

verified ·

1 Parent(s): 9293e13

Upload app.py (#6)

Browse files

- Upload app.py (6170869f0274c49c9e8da88c973faa02414c1826)

Files changed (1) hide show

app.py +116 -116

app.py CHANGED Viewed

@@ -1,116 +1,116 @@
-from dotenv import load_dotenv
-load_dotenv()
-import streamlit as st
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import PyPDFLoader,DirectoryLoader
-from langchain.chains.summarize import load_summarize_chain
-from transformers import pipeline
-import torch
-import base64
-# Load model directly
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
-base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
-#file loader and processing
-def file_preprocessing(file):
-    loader = PyPDFLoader(file)
-    pages = loader.load_and_split()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
-    texts = text_splitter.split_documents(pages)
-    final_texts = ""
-    for text in texts:
-        print(text)
-        final_texts = final_texts + text.page_content
-    return final_texts
-#lm pipeline
-def llm_pipleline(filepath):
-    pipe_sum = pipeline(
-        'summarization',
-        model = base_model,
-        tokenizer = tokenizer,
-        max_length = 500,
-        min_length = 50
-    )
-    input_text = file_preprocessing(filepath)
-    result = pipe_sum(input_text)
-    result = result[0]['summary_text']
-    return result
-def llm_pipleline1(ans):
-    pipe_sum = pipeline(
-        'summarization',
-        model = base_model,
-        tokenizer = tokenizer,
-        max_length = 500,
-        min_length = 50
-    )
-    input_text =""+ ans
-    result = pipe_sum(input_text)
-    result = result[0]['summary_text']
-    return result
-@st.cache_data
-# Function to display the PDF file
-def displayPDF(file):
-    # Opening file from file path
-    with open(file, "rb") as f:
-        base_pdf = base64.b64encode(f.read()).decode('utf-8')  # Corrected function name and variable
-    # Embedding PDF in HTML
-    pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
-    # Displaying the file
-    st.markdown(pdf_display, unsafe_allow_html=True)
-#streamlit code
-st.set_page_config(layout='wide')
-def main():
-    st.title('Content sumerizer')
-    uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
-    if uploaded_file is not None:
-        if st.button("summarize"):
-            col1,col2 = st.columns(2)
-            filepath = uploaded_file.name
-            with open(filepath, 'wb') as temp_file:
-                temp_file.write(uploaded_file.read())
-            with col1:
-                st.info("uploaded PDF File")
-                pdf_viewer = displayPDF(filepath)
-            with col2:
-                st.info("Summarization is below")
-                summary = llm_pipleline(filepath)
-                st.success(summary)
-    else :
-        print("enter a valid pdf file")
-    if st.button("text"):
-        ans = input("enter your content")
-        if st.button("Enter"):
-            col1,col2 = st.columns(2)
-            with col1:
-                st.info("what you have entered")
-                print(ans)
-            with col2:
-                st.info("Summarization is below")
-                summary1=llm_pipleline1(ans)
-                st.success(summary1)
-if __name__ == '__main__':
-    main()

+from dotenv import load_dotenv
+load_dotenv()
+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import PyPDFLoader,DirectoryLoader
+from langchain.chains.summarize import load_summarize_chain
+from transformers import pipeline
+import torch
+import base64
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
+base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
+#file loader and processing
+def file_preprocessing(file):
+    loader = PyPDFLoader(file)
+    pages = loader.load_and_split()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
+    texts = text_splitter.split_documents(pages)
+    final_texts = ""
+    for text in texts:
+        print(text)
+        final_texts = final_texts + text.page_content
+    return final_texts
+#lm pipeline
+def llm_pipleline(filepath):
+    pipe_sum = pipeline(
+        'summarization',
+        model = base_model,
+        tokenizer = tokenizer,
+        max_length = 500,
+        min_length = 50
+    )
+    input_text = file_preprocessing(filepath)
+    result = pipe_sum(input_text)
+    result = result[0]['summary_text']
+    return result
+def llm_pipleline1(ans):
+    pipe_sum = pipeline(
+        'summarization',
+        model = base_model,
+        tokenizer = tokenizer,
+        max_length = 500,
+        min_length = 50
+    )
+    input_text =""+ ans
+    result = pipe_sum(input_text)
+    result = result[0]['summary_text']
+    return result
+@st.cache_data
+# Function to display the PDF file
+def displayPDF(file):
+    # Opening file from file path
+    with open(file, "rb") as f:
+        base_pdf = base64.b64encode(f.read()).decode('utf-8')  # Corrected function name and variable
+    # Embedding PDF in HTML
+    pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
+    # Displaying the file
+    st.markdown(pdf_display, unsafe_allow_html=True)
+#streamlit code
+st.set_page_config(layout='wide')
+def main():
+    st.title('Content sumerizer')
+    uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
+    if uploaded_file is not None:
+        if st.button("summarize"):
+            col1,col2 = st.columns(2)
+            filepath = "data/"+uploaded_file.name
+            with open(filepath, 'wb') as temp_file:
+                temp_file.write(uploaded_file.read())
+            with col1:
+                st.info("uploaded PDF File")
+                pdf_viewer = displayPDF(filepath)
+            with col2:
+                st.info("Summarization is below")
+                summary = llm_pipleline(filepath)
+                st.success(summary)
+    else :
+        print("enter a valid pdf file")
+    if st.button("text"):
+        ans = input("enter your content")
+        if st.button("Enter"):
+            col1,col2 = st.columns(2)
+            with col1:
+                st.info("what you have entered")
+                print(ans)
+            with col2:
+                st.info("Summarization is below")
+                summary1=llm_pipleline1(ans)
+                st.success(summary1)
+if __name__ == '__main__':
+    main()            error is ameError: name 'base64_pdf' is not defined