2001muhammadumair's picture
Create app.py
78549c1 verified
raw
history blame
4.44 kB
import os
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import PyPDF2
# Grog API key (Use environment variable or replace it with your actual API key)
grog_api_key = "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"
# Initialize groq API client
client = Groq(api_key=grog_api_key)
# Path to the already uploaded book
book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'
# Check if the file exists
if os.path.exists(book_path):
print(f"Book found at: {book_path}")
else:
print("Book not found!")
# Function to read the PDF file
def read_pdf(file_path):
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
number_of_pages = len(reader.pages)
text = ""
for page_num in range(number_of_pages):
page = reader.pages[page_num]
text += page.extract_text()
return text
# Read the PDF content
book_text = read_pdf(book_path)
print(book_text[:1000]) # Print first 1000 characters of the book for verification
# Vectorization of the extracted PDF content
def vectorize_text(text):
try:
# Use Sentence Transformer to create embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
sentences = text.split('\n') # Split text into sentences for vectorization
embeddings = model.encode(sentences, show_progress_bar=True)
# Create FAISS index for similarity search
index = faiss.IndexFlatL2(embeddings.shape[1]) # L2 distance index
index.add(np.array(embeddings)) # Add embeddings to the index
print(f"Added {len(sentences)} sentences to the vector store.")
return index, sentences
except Exception as e:
print(f"Error during vectorization: {str(e)}")
return None, None
# Vectorize the extracted PDF text
vector_index, sentences = vectorize_text(book_text)
# Check if the vectorization was successful
if vector_index:
print("Vectorization complete.")
else:
print("Vectorization failed.")
# Function to generate embeddings for the query using the SentenceTransformer
def generate_query_embedding(query, sentence_transformer_model):
return sentence_transformer_model.encode([query])
# Function to generate answers using the grog API with Llama model
def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model):
try:
# Get the query embedding using the sentence transformer
query_embedding = generate_query_embedding(query, sentence_transformer_model)
# Perform similarity search on the vector store (vector index)
D, I = vector_index.search(np.array(query_embedding), k=5) # Find top 5 similar sentences
# Retrieve the most relevant sentences
relevant_sentences = [sentences[i] for i in I[0]]
# Combine the relevant sentences for the final query
combined_text = " ".join(relevant_sentences)
# Use groq API to generate the response
chat_completion = client.chat.completions.create(
messages=[{
"role": "user",
"content": combined_text,
}],
model="llama3-8b-8192",
)
# Extract and return the response content from the grog API
response = chat_completion.choices[0].message.content
return response
except Exception as e:
return f"Error during answer generation with grog API: {str(e)}"
# Gradio app function
def gradio_interface(query):
global vector_index, sentences
# Initialize the sentence transformer model
sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
if vector_index is None or sentences is None:
return "Vector index or sentences not initialized properly."
# Generate the answer using the grog API and Llama model
answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model)
return answer
# Create the Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs="text",
title="Generative_AI_Foundations_in_Python PDF-based Query Answering",
description="Ask any question about the content in the uploaded PDF and receive answers generated by Grog API with Llama model."
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()