2001muhammadumair commited on
Commit
78549c1
·
verified ·
1 Parent(s): fcfb623

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from groq import Groq
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import numpy as np
7
+ import PyPDF2
8
+
9
+ # Grog API key (Use environment variable or replace it with your actual API key)
10
+ grog_api_key = "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"
11
+
12
+ # Initialize groq API client
13
+ client = Groq(api_key=grog_api_key)
14
+
15
+ # Path to the already uploaded book
16
+ book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'
17
+
18
+ # Check if the file exists
19
+ if os.path.exists(book_path):
20
+ print(f"Book found at: {book_path}")
21
+ else:
22
+ print("Book not found!")
23
+
24
+ # Function to read the PDF file
25
+ def read_pdf(file_path):
26
+ with open(file_path, 'rb') as file:
27
+ reader = PyPDF2.PdfReader(file)
28
+ number_of_pages = len(reader.pages)
29
+ text = ""
30
+ for page_num in range(number_of_pages):
31
+ page = reader.pages[page_num]
32
+ text += page.extract_text()
33
+ return text
34
+
35
+ # Read the PDF content
36
+ book_text = read_pdf(book_path)
37
+ print(book_text[:1000]) # Print first 1000 characters of the book for verification
38
+
39
+ # Vectorization of the extracted PDF content
40
+ def vectorize_text(text):
41
+ try:
42
+ # Use Sentence Transformer to create embeddings
43
+ model = SentenceTransformer('all-MiniLM-L6-v2')
44
+ sentences = text.split('\n') # Split text into sentences for vectorization
45
+ embeddings = model.encode(sentences, show_progress_bar=True)
46
+
47
+ # Create FAISS index for similarity search
48
+ index = faiss.IndexFlatL2(embeddings.shape[1]) # L2 distance index
49
+ index.add(np.array(embeddings)) # Add embeddings to the index
50
+ print(f"Added {len(sentences)} sentences to the vector store.")
51
+
52
+ return index, sentences
53
+ except Exception as e:
54
+ print(f"Error during vectorization: {str(e)}")
55
+ return None, None
56
+
57
+ # Vectorize the extracted PDF text
58
+ vector_index, sentences = vectorize_text(book_text)
59
+
60
+ # Check if the vectorization was successful
61
+ if vector_index:
62
+ print("Vectorization complete.")
63
+ else:
64
+ print("Vectorization failed.")
65
+
66
+ # Function to generate embeddings for the query using the SentenceTransformer
67
+ def generate_query_embedding(query, sentence_transformer_model):
68
+ return sentence_transformer_model.encode([query])
69
+
70
+ # Function to generate answers using the grog API with Llama model
71
+ def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model):
72
+ try:
73
+ # Get the query embedding using the sentence transformer
74
+ query_embedding = generate_query_embedding(query, sentence_transformer_model)
75
+
76
+ # Perform similarity search on the vector store (vector index)
77
+ D, I = vector_index.search(np.array(query_embedding), k=5) # Find top 5 similar sentences
78
+
79
+ # Retrieve the most relevant sentences
80
+ relevant_sentences = [sentences[i] for i in I[0]]
81
+
82
+ # Combine the relevant sentences for the final query
83
+ combined_text = " ".join(relevant_sentences)
84
+
85
+ # Use groq API to generate the response
86
+ chat_completion = client.chat.completions.create(
87
+ messages=[{
88
+ "role": "user",
89
+ "content": combined_text,
90
+ }],
91
+ model="llama3-8b-8192",
92
+ )
93
+
94
+ # Extract and return the response content from the grog API
95
+ response = chat_completion.choices[0].message.content
96
+ return response
97
+ except Exception as e:
98
+ return f"Error during answer generation with grog API: {str(e)}"
99
+
100
+ # Gradio app function
101
+ def gradio_interface(query):
102
+ global vector_index, sentences
103
+
104
+ # Initialize the sentence transformer model
105
+ sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
106
+
107
+ if vector_index is None or sentences is None:
108
+ return "Vector index or sentences not initialized properly."
109
+
110
+ # Generate the answer using the grog API and Llama model
111
+ answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model)
112
+ return answer
113
+
114
+ # Create the Gradio interface
115
+ iface = gr.Interface(
116
+ fn=gradio_interface,
117
+ inputs="text",
118
+ outputs="text",
119
+ title="Generative_AI_Foundations_in_Python PDF-based Query Answering",
120
+ description="Ask any question about the content in the uploaded PDF and receive answers generated by Grog API with Llama model."
121
+ )
122
+
123
+ # Launch the Gradio app
124
+ if __name__ == "__main__":
125
+ iface.launch()