import os
import requests
import streamlit as st
from PyPDF2 import PdfReader
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
import re
from langchain_groq import ChatGroq
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
api_key = os.getenv("Gemini_api_key")
llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key)
api_key2=os.getenv("Llama_api_key")
MODEL_ID = "llama3-groq-70b-8192-tool-use-preview" #"llama3-groq-70b-8192-tool-use-preview" "llama-3.2-90b-text-preview"
llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2)
def download_pdf_from_url(url, local_file_path):
response = requests.get(url)
with open(local_file_path, 'wb') as f:
f.write(response.content)
def extract_text_from_pdf(pdf_file_path):
reader = PdfReader(pdf_file_path)
text = ""
for page in reader.pages:
extracted_text = page.extract_text()
if extracted_text:
text += extracted_text + "\n"
return text.strip()
def clean_html_tags(text):
clean_text = re.sub(r"
||", "", text)
clean_text = re.sub(r"<.*?>", "", clean_text)
return clean_text.strip()
# Define the template for summarization
template = """
Based on the following document:
{document_text}
Please provide the summary in a **table format**. Each point should be in its own row, with the following columns:
| **Aspect** | **Details** |
|--------------------------|---------------------------------------------------------------------|
| What did they do? | Briefly describe the main task, objective, or experiment. |
| Contributions | Highlight the main contributions of the paper. |
| Hardware | Name, model, price (if available), link (if available), function. |
| Software | Type (commercial/free/custom-developed), version, availability, features. |
| Dataset | Type (public/private), type of data (image, text, video, log), duration, size. |
| Algorithms | List the algorithms or models used. |
| Place of Experiment | Where was the experiment conducted (institution/lab)? |
| Claimed Results | Summarize the key results and findings. |
| Limitations | Identify limitations or shortcomings. |
| Solutions | Suggest possible solutions for overcoming limitations. |
| Improvements | Suggest potential improvements or additions. |
Ensure each section is concise but informative.
"""
# Prompt Template
prompt = PromptTemplate(template=template, input_variables=["document_text"])
template2 = """
Paraphrase the following paragraph in academic research format:
#NO PREAMBLE #
#DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH#
{paragraph}
"""
# Prompt Template
prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])