Spaces:

2001muhammadumair
/

Generative_Ai_Foundation_in_Python

Sleeping

App Files Files Community

2001muhammadumair commited on Oct 24, 2024

Commit

78549c1

verified ·

1 Parent(s): fcfb623

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import gradio as gr
+from groq import Groq
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import PyPDF2
+# Grog API key (Use environment variable or replace it with your actual API key)
+grog_api_key = "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"
+# Initialize groq API client
+client = Groq(api_key=grog_api_key)
+# Path to the already uploaded book
+book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'
+# Check if the file exists
+if os.path.exists(book_path):
+    print(f"Book found at: {book_path}")
+else:
+    print("Book not found!")
+# Function to read the PDF file
+def read_pdf(file_path):
+    with open(file_path, 'rb') as file:
+        reader = PyPDF2.PdfReader(file)
+        number_of_pages = len(reader.pages)
+        text = ""
+        for page_num in range(number_of_pages):
+            page = reader.pages[page_num]
+            text += page.extract_text()
+        return text
+# Read the PDF content
+book_text = read_pdf(book_path)
+print(book_text[:1000])  # Print first 1000 characters of the book for verification
+# Vectorization of the extracted PDF content
+def vectorize_text(text):
+    try:
+        # Use Sentence Transformer to create embeddings
+        model = SentenceTransformer('all-MiniLM-L6-v2')
+        sentences = text.split('\n')  # Split text into sentences for vectorization
+        embeddings = model.encode(sentences, show_progress_bar=True)
+        # Create FAISS index for similarity search
+        index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance index
+        index.add(np.array(embeddings))  # Add embeddings to the index
+        print(f"Added {len(sentences)} sentences to the vector store.")
+        return index, sentences
+    except Exception as e:
+        print(f"Error during vectorization: {str(e)}")
+        return None, None
+# Vectorize the extracted PDF text
+vector_index, sentences = vectorize_text(book_text)
+# Check if the vectorization was successful
+if vector_index:
+    print("Vectorization complete.")
+else:
+    print("Vectorization failed.")
+# Function to generate embeddings for the query using the SentenceTransformer
+def generate_query_embedding(query, sentence_transformer_model):
+    return sentence_transformer_model.encode([query])
+# Function to generate answers using the grog API with Llama model
+def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model):
+    try:
+        # Get the query embedding using the sentence transformer
+        query_embedding = generate_query_embedding(query, sentence_transformer_model)
+        # Perform similarity search on the vector store (vector index)
+        D, I = vector_index.search(np.array(query_embedding), k=5)  # Find top 5 similar sentences
+        # Retrieve the most relevant sentences
+        relevant_sentences = [sentences[i] for i in I[0]]
+        # Combine the relevant sentences for the final query
+        combined_text = " ".join(relevant_sentences)
+        # Use groq API to generate the response
+        chat_completion = client.chat.completions.create(
+            messages=[{
+                "role": "user",
+                "content": combined_text,
+            }],
+            model="llama3-8b-8192",
+        )
+        # Extract and return the response content from the grog API
+        response = chat_completion.choices[0].message.content
+        return response
+    except Exception as e:
+        return f"Error during answer generation with grog API: {str(e)}"
+# Gradio app function
+def gradio_interface(query):
+    global vector_index, sentences
+    # Initialize the sentence transformer model
+    sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
+    if vector_index is None or sentences is None:
+        return "Vector index or sentences not initialized properly."
+    # Generate the answer using the grog API and Llama model
+    answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model)
+    return answer
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs="text",
+    outputs="text",
+    title="Generative_AI_Foundations_in_Python PDF-based Query Answering",
+    description="Ask any question about the content in the uploaded PDF and receive answers generated by Grog API with Llama model."
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()