File size: 3,167 Bytes
fecd026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import requests
import streamlit as st
from PyPDF2 import PdfReader
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
import re
from langchain_groq import ChatGroq
from secret_key import gemeni_key,llama_key

api_key = os.getenv("Gemini_api_key")
llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)

api_key2=os.getenv("Llama_api_key")
MODEL_ID = "llama3-groq-70b-8192-tool-use-preview"
llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)



def download_pdf_from_url(url, local_file_path):
    response = requests.get(url)
    with open(local_file_path, 'wb') as f:
        f.write(response.content)


def extract_text_from_pdf(pdf_file_path):
    reader = PdfReader(pdf_file_path)
    text = ""
    for page in reader.pages:
        extracted_text = page.extract_text()
        if extracted_text:  
            text += extracted_text + "\n"  
    return text.strip()

def clean_html_tags(text):
    clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text)
    clean_text = re.sub(r"<.*?>", "", clean_text)
    return clean_text.strip()

# Define the template for summarization
template = """

Based on the following document:



{document_text}



Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:



| **Aspect**               | **Details**                                                         |

|--------------------------|---------------------------------------------------------------------|

| What did they do?         | Briefly describe the main task, objective, or experiment.           |

| Contributions             | Highlight the main contributions of the paper.                     |

| Hardware                  | Name, model, price (if available), link (if available), function.   |

| Software                  | Type (commercial/free/custom-developed), version, availability, features. |

| Dataset                   | Type (public/private), type of data (image, text, video, log), duration, size. |

| Algorithms                | List the algorithms or models used.                                |

| Place of Experiment       | Where was the experiment conducted (institution/lab)?               |

| Claimed Results           | Summarize the key results and findings.                            |

| Limitations               | Identify limitations or shortcomings.                              |

| Solutions                 | Suggest possible solutions for overcoming limitations.             |

| Improvements              | Suggest potential improvements or additions.                       |



Ensure each section is concise but informative.

"""

# Prompt Template
prompt = PromptTemplate(template=template, input_variables=["document_text"])

template2 = """

Paraphrase the following paragraph in academic research format:

#NO PREAMBLE #

#DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#





{paragraph}

"""

# Prompt Template
prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])