|
import os |
|
import gradio as gr |
|
from groq import Groq |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import numpy as np |
|
import PyPDF2 |
|
|
|
|
|
grog_api_key = "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" |
|
|
|
|
|
client = Groq(api_key=grog_api_key) |
|
|
|
|
|
book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf' |
|
|
|
|
|
if os.path.exists(book_path): |
|
print(f"Book found at: {book_path}") |
|
else: |
|
print("Book not found!") |
|
|
|
|
|
def read_pdf(file_path): |
|
with open(file_path, 'rb') as file: |
|
reader = PyPDF2.PdfReader(file) |
|
number_of_pages = len(reader.pages) |
|
text = "" |
|
for page_num in range(number_of_pages): |
|
page = reader.pages[page_num] |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
book_text = read_pdf(book_path) |
|
print(book_text[:1000]) |
|
|
|
|
|
def vectorize_text(text): |
|
try: |
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
sentences = text.split('\n') |
|
embeddings = model.encode(sentences, show_progress_bar=True) |
|
|
|
|
|
index = faiss.IndexFlatL2(embeddings.shape[1]) |
|
index.add(np.array(embeddings)) |
|
print(f"Added {len(sentences)} sentences to the vector store.") |
|
|
|
return index, sentences |
|
except Exception as e: |
|
print(f"Error during vectorization: {str(e)}") |
|
return None, None |
|
|
|
|
|
vector_index, sentences = vectorize_text(book_text) |
|
|
|
|
|
if vector_index: |
|
print("Vectorization complete.") |
|
else: |
|
print("Vectorization failed.") |
|
|
|
|
|
def generate_query_embedding(query, sentence_transformer_model): |
|
return sentence_transformer_model.encode([query]) |
|
|
|
|
|
def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model): |
|
try: |
|
|
|
query_embedding = generate_query_embedding(query, sentence_transformer_model) |
|
|
|
|
|
D, I = vector_index.search(np.array(query_embedding), k=5) |
|
|
|
|
|
relevant_sentences = [sentences[i] for i in I[0]] |
|
|
|
|
|
combined_text = " ".join(relevant_sentences) |
|
|
|
|
|
chat_completion = client.chat.completions.create( |
|
messages=[{ |
|
"role": "user", |
|
"content": combined_text, |
|
}], |
|
model="llama3-8b-8192", |
|
) |
|
|
|
|
|
response = chat_completion.choices[0].message.content |
|
return response |
|
except Exception as e: |
|
return f"Error during answer generation with grog API: {str(e)}" |
|
|
|
|
|
def gradio_interface(query): |
|
global vector_index, sentences |
|
|
|
|
|
sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
if vector_index is None or sentences is None: |
|
return "Vector index or sentences not initialized properly." |
|
|
|
|
|
answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model) |
|
return answer |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs="text", |
|
outputs="text", |
|
title="Generative_AI_Foundations_in_Python PDF-based Query Answering", |
|
description="Ask any question about the content in the uploaded PDF and receive answers generated by Grog API with Llama model." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|