File size: 3,213 Bytes
40efbd9
 
 
 
 
 
 
 
1dd1822
40efbd9
 
 
 
 
 
c64be81
40efbd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import requests
import streamlit as st
from PyPDF2 import PdfReader
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
import re
from langchain_groq import ChatGroq
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI


api_key = os.getenv("Gemini_api_key")
llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)

api_key2=os.getenv("Llama_api_key")
MODEL_ID = "llama3-groq-70b-8192-tool-use-preview"                              #"llama3-groq-70b-8192-tool-use-preview" "llama-3.2-90b-text-preview" 
llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)



def download_pdf_from_url(url, local_file_path):
    response = requests.get(url)
    with open(local_file_path, 'wb') as f:
        f.write(response.content)


def extract_text_from_pdf(pdf_file_path):
    reader = PdfReader(pdf_file_path)
    text = ""
    for page in reader.pages:
        extracted_text = page.extract_text()
        if extracted_text:  
            text += extracted_text + "\n"  
    return text.strip()

def clean_html_tags(text):
    clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text)
    clean_text = re.sub(r"<.*?>", "", clean_text)
    return clean_text.strip()

# Define the template for summarization
template = """
Based on the following document:

{document_text}

Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:

| **Aspect**               | **Details**                                                         |
|--------------------------|---------------------------------------------------------------------|
| What did they do?         | Briefly describe the main task, objective, or experiment.           |
| Contributions             | Highlight the main contributions of the paper.                     |
| Hardware                  | Name, model, price (if available), link (if available), function.   |
| Software                  | Type (commercial/free/custom-developed), version, availability, features. |
| Dataset                   | Type (public/private), type of data (image, text, video, log), duration, size. |
| Algorithms                | List the algorithms or models used.                                |
| Place of Experiment       | Where was the experiment conducted (institution/lab)?               |
| Claimed Results           | Summarize the key results and findings.                            |
| Limitations               | Identify limitations or shortcomings.                              |
| Solutions                 | Suggest possible solutions for overcoming limitations.             |
| Improvements              | Suggest potential improvements or additions.                       |

Ensure each section is concise but informative.
"""

# Prompt Template
prompt = PromptTemplate(template=template, input_variables=["document_text"])

template2 = """
Paraphrase the following paragraph in academic research format:
#NO PREAMBLE #
#DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#


{paragraph}
"""

# Prompt Template
prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])