import os import requests import streamlit as st from PyPDF2 import PdfReader from langchain.prompts import PromptTemplate from langchain_google_genai import ChatGoogleGenerativeAI import re from langchain_groq import ChatGroq from langchain_google_genai.chat_models import ChatGoogleGenerativeAI api_key = os.getenv("Gemini_api_key") llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key) api_key2=os.getenv("Llama_api_key") MODEL_ID = "llama3-groq-70b-8192-tool-use-preview" #"llama3-groq-70b-8192-tool-use-preview" "llama-3.2-90b-text-preview" llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2) def download_pdf_from_url(url, local_file_path): response = requests.get(url) with open(local_file_path, 'wb') as f: f.write(response.content) def extract_text_from_pdf(pdf_file_path): reader = PdfReader(pdf_file_path) text = "" for page in reader.pages: extracted_text = page.extract_text() if extracted_text: text += extracted_text + "\n" return text.strip() def clean_html_tags(text): clean_text = re.sub(r"|
  • |
  • ", "", text) clean_text = re.sub(r"<.*?>", "", clean_text) return clean_text.strip() # Define the template for summarization template = """ Based on the following document: {document_text} Please provide the summary in a **table format**. Each point should be in its own row, with the following columns: | **Aspect** | **Details** | |--------------------------|---------------------------------------------------------------------| | What did they do? | Briefly describe the main task, objective, or experiment. | | Contributions | Highlight the main contributions of the paper. | | Hardware | Name, model, price (if available), link (if available), function. | | Software | Type (commercial/free/custom-developed), version, availability, features. | | Dataset | Type (public/private), type of data (image, text, video, log), duration, size. | | Algorithms | List the algorithms or models used. | | Place of Experiment | Where was the experiment conducted (institution/lab)? | | Claimed Results | Summarize the key results and findings. | | Limitations | Identify limitations or shortcomings. | | Solutions | Suggest possible solutions for overcoming limitations. | | Improvements | Suggest potential improvements or additions. | Ensure each section is concise but informative. """ # Prompt Template prompt = PromptTemplate(template=template, input_variables=["document_text"]) template2 = """ Paraphrase the following paragraph in academic research format: #NO PREAMBLE # #DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH# {paragraph} """ # Prompt Template prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"])