shamimjony1000 commited on
Commit
40efbd9
·
verified ·
1 Parent(s): fecd026

Update llm_part.py

Browse files
Files changed (1) hide show
  1. llm_part.py +81 -81
llm_part.py CHANGED
@@ -1,81 +1,81 @@
1
- import os
2
- import requests
3
- import streamlit as st
4
- from PyPDF2 import PdfReader
5
- from langchain.prompts import PromptTemplate
6
- from langchain_google_genai import ChatGoogleGenerativeAI
7
- import re
8
- from langchain_groq import ChatGroq
9
- from secret_key import gemeni_key,llama_key
10
-
11
- api_key = os.getenv("Gemini_api_key")
12
- llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)
13
-
14
- api_key2=os.getenv("Llama_api_key")
15
- MODEL_ID = "llama3-groq-70b-8192-tool-use-preview"
16
- llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)
17
-
18
-
19
-
20
- def download_pdf_from_url(url, local_file_path):
21
- response = requests.get(url)
22
- with open(local_file_path, 'wb') as f:
23
- f.write(response.content)
24
-
25
-
26
- def extract_text_from_pdf(pdf_file_path):
27
- reader = PdfReader(pdf_file_path)
28
- text = ""
29
- for page in reader.pages:
30
- extracted_text = page.extract_text()
31
- if extracted_text:
32
- text += extracted_text + "\n"
33
- return text.strip()
34
-
35
- def clean_html_tags(text):
36
- clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text)
37
- clean_text = re.sub(r"<.*?>", "", clean_text)
38
- return clean_text.strip()
39
-
40
- # Define the template for summarization
41
- template = """
42
- Based on the following document:
43
-
44
- {document_text}
45
-
46
- Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:
47
-
48
- | **Aspect** | **Details** |
49
- |--------------------------|---------------------------------------------------------------------|
50
- | What did they do? | Briefly describe the main task, objective, or experiment. |
51
- | Contributions | Highlight the main contributions of the paper. |
52
- | Hardware | Name, model, price (if available), link (if available), function. |
53
- | Software | Type (commercial/free/custom-developed), version, availability, features. |
54
- | Dataset | Type (public/private), type of data (image, text, video, log), duration, size. |
55
- | Algorithms | List the algorithms or models used. |
56
- | Place of Experiment | Where was the experiment conducted (institution/lab)? |
57
- | Claimed Results | Summarize the key results and findings. |
58
- | Limitations | Identify limitations or shortcomings. |
59
- | Solutions | Suggest possible solutions for overcoming limitations. |
60
- | Improvements | Suggest potential improvements or additions. |
61
-
62
- Ensure each section is concise but informative.
63
- """
64
-
65
- # Prompt Template
66
- prompt = PromptTemplate(template=template, input_variables=["document_text"])
67
-
68
- template2 = """
69
- Paraphrase the following paragraph in academic research format:
70
- #NO PREAMBLE #
71
- #DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#
72
-
73
-
74
- {paragraph}
75
- """
76
-
77
- # Prompt Template
78
- prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])
79
-
80
-
81
-
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ import re
8
+ from langchain_groq import ChatGroq
9
+
10
+
11
+ api_key = os.getenv("Gemini_api_key")
12
+ llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)
13
+
14
+ api_key2=os.getenv("Llama_api_key")
15
+ MODEL_ID = "llama3-groq-70b-8192-tool-use-preview"
16
+ llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)
17
+
18
+
19
+
20
+ def download_pdf_from_url(url, local_file_path):
21
+ response = requests.get(url)
22
+ with open(local_file_path, 'wb') as f:
23
+ f.write(response.content)
24
+
25
+
26
+ def extract_text_from_pdf(pdf_file_path):
27
+ reader = PdfReader(pdf_file_path)
28
+ text = ""
29
+ for page in reader.pages:
30
+ extracted_text = page.extract_text()
31
+ if extracted_text:
32
+ text += extracted_text + "\n"
33
+ return text.strip()
34
+
35
+ def clean_html_tags(text):
36
+ clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text)
37
+ clean_text = re.sub(r"<.*?>", "", clean_text)
38
+ return clean_text.strip()
39
+
40
+ # Define the template for summarization
41
+ template = """
42
+ Based on the following document:
43
+
44
+ {document_text}
45
+
46
+ Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:
47
+
48
+ | **Aspect** | **Details** |
49
+ |--------------------------|---------------------------------------------------------------------|
50
+ | What did they do? | Briefly describe the main task, objective, or experiment. |
51
+ | Contributions | Highlight the main contributions of the paper. |
52
+ | Hardware | Name, model, price (if available), link (if available), function. |
53
+ | Software | Type (commercial/free/custom-developed), version, availability, features. |
54
+ | Dataset | Type (public/private), type of data (image, text, video, log), duration, size. |
55
+ | Algorithms | List the algorithms or models used. |
56
+ | Place of Experiment | Where was the experiment conducted (institution/lab)? |
57
+ | Claimed Results | Summarize the key results and findings. |
58
+ | Limitations | Identify limitations or shortcomings. |
59
+ | Solutions | Suggest possible solutions for overcoming limitations. |
60
+ | Improvements | Suggest potential improvements or additions. |
61
+
62
+ Ensure each section is concise but informative.
63
+ """
64
+
65
+ # Prompt Template
66
+ prompt = PromptTemplate(template=template, input_variables=["document_text"])
67
+
68
+ template2 = """
69
+ Paraphrase the following paragraph in academic research format:
70
+ #NO PREAMBLE #
71
+ #DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#
72
+
73
+
74
+ {paragraph}
75
+ """
76
+
77
+ # Prompt Template
78
+ prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])
79
+
80
+
81
+