File size: 4,440 Bytes
78549c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import PyPDF2

# Grog API key (Use environment variable or replace it with your actual API key)
grog_api_key = "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"

# Initialize groq API client
client = Groq(api_key=grog_api_key)

# Path to the already uploaded book
book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'

# Check if the file exists
if os.path.exists(book_path):
    print(f"Book found at: {book_path}")
else:
    print("Book not found!")

# Function to read the PDF file
def read_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        number_of_pages = len(reader.pages)
        text = ""
        for page_num in range(number_of_pages):
            page = reader.pages[page_num]
            text += page.extract_text()
        return text

# Read the PDF content
book_text = read_pdf(book_path)
print(book_text[:1000])  # Print first 1000 characters of the book for verification

# Vectorization of the extracted PDF content
def vectorize_text(text):
    try:
        # Use Sentence Transformer to create embeddings
        model = SentenceTransformer('all-MiniLM-L6-v2')
        sentences = text.split('\n')  # Split text into sentences for vectorization
        embeddings = model.encode(sentences, show_progress_bar=True)

        # Create FAISS index for similarity search
        index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance index
        index.add(np.array(embeddings))  # Add embeddings to the index
        print(f"Added {len(sentences)} sentences to the vector store.")

        return index, sentences
    except Exception as e:
        print(f"Error during vectorization: {str(e)}")
        return None, None

# Vectorize the extracted PDF text
vector_index, sentences = vectorize_text(book_text)

# Check if the vectorization was successful
if vector_index:
    print("Vectorization complete.")
else:
    print("Vectorization failed.")

# Function to generate embeddings for the query using the SentenceTransformer
def generate_query_embedding(query, sentence_transformer_model):
    return sentence_transformer_model.encode([query])

# Function to generate answers using the grog API with Llama model
def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model):
    try:
        # Get the query embedding using the sentence transformer
        query_embedding = generate_query_embedding(query, sentence_transformer_model)

        # Perform similarity search on the vector store (vector index)
        D, I = vector_index.search(np.array(query_embedding), k=5)  # Find top 5 similar sentences

        # Retrieve the most relevant sentences
        relevant_sentences = [sentences[i] for i in I[0]]

        # Combine the relevant sentences for the final query
        combined_text = " ".join(relevant_sentences)

        # Use groq API to generate the response
        chat_completion = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": combined_text,
            }],
            model="llama3-8b-8192",
        )

        # Extract and return the response content from the grog API
        response = chat_completion.choices[0].message.content
        return response
    except Exception as e:
        return f"Error during answer generation with grog API: {str(e)}"

# Gradio app function
def gradio_interface(query):
    global vector_index, sentences

    # Initialize the sentence transformer model
    sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')

    if vector_index is None or sentences is None:
        return "Vector index or sentences not initialized properly."

    # Generate the answer using the grog API and Llama model
    answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model)
    return answer

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="Generative_AI_Foundations_in_Python PDF-based Query Answering",
    description="Ask any question about the content in the uploaded PDF and receive answers generated by Grog API with Llama model."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()