Spaces:
Runtime error
Runtime error
import os | |
import requests | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.prompts import PromptTemplate | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
import re | |
from langchain_groq import ChatGroq | |
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI | |
api_key = os.getenv("Gemini_api_key") | |
llm_1 = ChatGoogleGenerativeAI(model="gemini-pro", api_key=api_key) | |
api_key2=os.getenv("Llama_api_key") | |
MODEL_ID = "llama3-groq-70b-8192-tool-use-preview" #"llama3-groq-70b-8192-tool-use-preview" "llama-3.2-90b-text-preview" | |
llm_2=ChatGroq(model=MODEL_ID, temperature=0, groq_api_key=api_key2) | |
def download_pdf_from_url(url, local_file_path): | |
response = requests.get(url) | |
with open(local_file_path, 'wb') as f: | |
f.write(response.content) | |
def extract_text_from_pdf(pdf_file_path): | |
reader = PdfReader(pdf_file_path) | |
text = "" | |
for page in reader.pages: | |
extracted_text = page.extract_text() | |
if extracted_text: | |
text += extracted_text + "\n" | |
return text.strip() | |
def clean_html_tags(text): | |
clean_text = re.sub(r"<ul>|</ul>|<li>|</li>", "", text) | |
clean_text = re.sub(r"<.*?>", "", clean_text) | |
return clean_text.strip() | |
# Define the template for summarization | |
template = """ | |
Based on the following document: | |
{document_text} | |
Please provide the summary in a **table format**. Each point should be in its own row, with the following columns: | |
| **Aspect** | **Details** | | |
|--------------------------|---------------------------------------------------------------------| | |
| What did they do? | Briefly describe the main task, objective, or experiment. | | |
| Contributions | Highlight the main contributions of the paper. | | |
| Hardware | Name, model, price (if available), link (if available), function. | | |
| Software | Type (commercial/free/custom-developed), version, availability, features. | | |
| Dataset | Type (public/private), type of data (image, text, video, log), duration, size. | | |
| Algorithms | List the algorithms or models used. | | |
| Place of Experiment | Where was the experiment conducted (institution/lab)? | | |
| Claimed Results | Summarize the key results and findings. | | |
| Limitations | Identify limitations or shortcomings. | | |
| Solutions | Suggest possible solutions for overcoming limitations. | | |
| Improvements | Suggest potential improvements or additions. | | |
Ensure each section is concise but informative. | |
""" | |
# Prompt Template | |
prompt = PromptTemplate(template=template, input_variables=["document_text"]) | |
template2 = """ | |
Paraphrase the following paragraph in academic research format: | |
#NO PREAMBLE # | |
#DONT INCLUDE ANY BULLET POINTS WRITE IN SINGLE PARAGRAPH# | |
{paragraph} | |
""" | |
# Prompt Template | |
prompt2 = PromptTemplate(template=template2, input_variables=["paragraph"]) | |