shamimjony1000 commited on
Commit
fecd026
·
verified ·
1 Parent(s): 905e6a1

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +100 -0
  2. llm_part.py +81 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import llm_part
3
+ import os
4
+ from langchain_groq import ChatGroq
5
+
6
+ # Sidebar to select the LLM model
7
+ st.sidebar.title("LLM Model Selector")
8
+ llm_model = st.sidebar.selectbox("Select LLM Model", ("Google Gemini", "Llama"))
9
+
10
+
11
+ # Define Llama-specific configurations
12
+ if llm_model == "Google Gemini": # Check if "Google Gemini" is selected
13
+ llm = llm_part.llm_1 # Assign Google Gemini to llm
14
+ else:
15
+ llm = llm_part.llm_2 # Use the Llama model
16
+
17
+
18
+
19
+
20
+ # Main app
21
+ st.title("Jony's Custom Research Notes Extracted from PDFs Using " + llm_model)
22
+ option = st.selectbox("Select PDF Source:", ("Enter URL", "Upload Local File"))
23
+
24
+ document_text = ""
25
+
26
+ if option == "Enter URL":
27
+ pdf_url = st.text_input("Enter the PDF URL:")
28
+
29
+ if pdf_url:
30
+ try:
31
+ with st.spinner("Processing PDF from URL..."):
32
+ local_pdf_path = "downloaded_paper.pdf"
33
+ llm_part.download_pdf_from_url(pdf_url, local_pdf_path)
34
+ document_text = llm_part.extract_text_from_pdf(local_pdf_path)
35
+ os.remove(local_pdf_path)
36
+
37
+ except Exception as e:
38
+ st.error(f"Error processing PDF from URL: {e}")
39
+
40
+ elif option == "Upload Local File":
41
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
42
+
43
+ if uploaded_file is not None:
44
+ try:
45
+ with st.spinner("Processing uploaded PDF..."):
46
+ local_pdf_path = "uploaded_paper.pdf"
47
+ with open(local_pdf_path, "wb") as f:
48
+ f.write(uploaded_file.read())
49
+ document_text = llm_part.extract_text_from_pdf(local_pdf_path)
50
+ os.remove(local_pdf_path)
51
+
52
+ except Exception as e:
53
+ st.error(f"Error processing uploaded PDF: {e}")
54
+
55
+ if document_text:
56
+ with st.spinner("Generating the summary..."):
57
+ query = llm_part.prompt.format(document_text=document_text[:20000])
58
+ result = llm.invoke(query)
59
+ st.write("### Summary in Table Format:")
60
+ st.write(result.content)
61
+ lines = result.content.split('\n')
62
+ paragraph_output = []
63
+
64
+ for line in lines[2:]:
65
+ if "|" not in line or not line.strip():
66
+ continue
67
+
68
+ parts = [part.strip() for part in line.split("|") if part.strip()]
69
+ if len(parts) == 2:
70
+ _, details = parts
71
+ if "Not specified" in details or "Not mentioned" in details:
72
+ continue
73
+ details_clean = llm_part.clean_html_tags(details)
74
+ paragraph_output.append(details_clean)
75
+
76
+ paragraph_output = ". ".join(paragraph_output) + "."
77
+ paragraph_output = paragraph_output.replace(" ,", ",").replace(" .", ".")
78
+ paragraph_output = paragraph_output.replace(". CNN", ". In this approach, CNN").replace("Federated learning (FL)", "The use of Federated Learning (FL)")
79
+
80
+ paragraph_output = paragraph_output.replace("The use of Federated Learning", "The study explores the use of Federated Learning")
81
+ paragraph_output = paragraph_output.replace("In this approach, CNN", "In this approach, a combination of CNN models was used to enhance performance")
82
+ paragraph_output = paragraph_output.replace("achieved", "yielded results indicating")
83
+ paragraph_output = paragraph_output.replace("slightly lower", "only marginally lower")
84
+
85
+ query2 = llm_part.prompt2.format(paragraph=paragraph_output)
86
+ result2 = llm.invoke(query2)
87
+ st.write("### Answer in Paragraph Style:")
88
+ st.markdown("""
89
+ <style>
90
+ .justified-text {
91
+ text-align: justify;
92
+ }
93
+ </style>
94
+ """, unsafe_allow_html=True)
95
+
96
+ st.markdown(f"<div class='justified-text'>{result2.content}</div>", unsafe_allow_html=True)
97
+
98
+
99
+ #pip install -r requirements.txt
100
+
llm_part.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ import re
8
+ from langchain_groq import ChatGroq
9
+ from secret_key import gemeni_key,llama_key
10
+
11
+ api_key = os.getenv("Gemini_api_key")
12
+ llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)
13
+
14
+ api_key2=os.getenv("Llama_api_key")
15
+ MODEL_ID = "llama3-groq-70b-8192-tool-use-preview"
16
+ llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)
17
+
18
+
19
+
20
+ def download_pdf_from_url(url, local_file_path):
21
+ response = requests.get(url)
22
+ with open(local_file_path, 'wb') as f:
23
+ f.write(response.content)
24
+
25
+
26
+ def extract_text_from_pdf(pdf_file_path):
27
+ reader = PdfReader(pdf_file_path)
28
+ text = ""
29
+ for page in reader.pages:
30
+ extracted_text = page.extract_text()
31
+ if extracted_text:
32
+ text += extracted_text + "\n"
33
+ return text.strip()
34
+
35
+ def clean_html_tags(text):
36
+ clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text)
37
+ clean_text = re.sub(r"<.*?>", "", clean_text)
38
+ return clean_text.strip()
39
+
40
+ # Define the template for summarization
41
+ template = """
42
+ Based on the following document:
43
+
44
+ {document_text}
45
+
46
+ Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:
47
+
48
+ | **Aspect** | **Details** |
49
+ |--------------------------|---------------------------------------------------------------------|
50
+ | What did they do? | Briefly describe the main task, objective, or experiment. |
51
+ | Contributions | Highlight the main contributions of the paper. |
52
+ | Hardware | Name, model, price (if available), link (if available), function. |
53
+ | Software | Type (commercial/free/custom-developed), version, availability, features. |
54
+ | Dataset | Type (public/private), type of data (image, text, video, log), duration, size. |
55
+ | Algorithms | List the algorithms or models used. |
56
+ | Place of Experiment | Where was the experiment conducted (institution/lab)? |
57
+ | Claimed Results | Summarize the key results and findings. |
58
+ | Limitations | Identify limitations or shortcomings. |
59
+ | Solutions | Suggest possible solutions for overcoming limitations. |
60
+ | Improvements | Suggest potential improvements or additions. |
61
+
62
+ Ensure each section is concise but informative.
63
+ """
64
+
65
+ # Prompt Template
66
+ prompt = PromptTemplate(template=template, input_variables=["document_text"])
67
+
68
+ template2 = """
69
+ Paraphrase the following paragraph in academic research format:
70
+ #NO PREAMBLE #
71
+ #DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#
72
+
73
+
74
+ {paragraph}
75
+ """
76
+
77
+ # Prompt Template
78
+ prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])
79
+
80
+
81
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ requests
3
+ streamlit
4
+ PyPDF2
5
+ langchain
6
+ langchain-google-genai
7
+ langchain-groq
8
+ langchain_community