Spaces:

2001muhammadumair
/

Generative_Ai_Foundation_in_Python

Sleeping

2001muhammadumair commited on Oct 25, 2024

Commit

c836f81

verified ·

1 Parent(s): 74b836e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import PyPDF2
 from sklearn.metrics.pairwise import cosine_similarity
 from collections import Counter
-# ---------------------- Setup ---------------------
 logging.basicConfig(
     filename='query_logs.log',
@@ -23,6 +23,21 @@ PDF_PATH = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'
 sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
 cache = {}
 # --------------------- PDF Processing ---------------------
 def read_pdf(file_path):
@@ -41,21 +56,10 @@ def read_pdf(file_path):
                     sentences_with_pages.append({'sentence': sentence, 'page_number': page_num + 1})
     return sentences_with_pages
 sentences_with_pages = read_pdf(PDF_PATH)
 vector_index, sentences_with_pages = vectorize_text(sentences_with_pages)
-def vectorize_text(sentences_with_pages):
-    try:
-        sentences = [item['sentence'] for item in sentences_with_pages]
-        embeddings = sentence_transformer_model.encode(sentences, show_progress_bar=True)
-        index = faiss.IndexFlatL2(embeddings.shape[1])
-        index.add(np.array(embeddings))
-        logging.info(f"Added {len(sentences)} sentences to the vector store.")
-        return index, sentences_with_pages
-    except Exception as e:
-        logging.error(f"Error during vectorization: {str(e)}")
-        return None, None
 # --------------------- Query Handling ---------------------
 def generate_query_embedding(query):

 from sklearn.metrics.pairwise import cosine_similarity
 from collections import Counter
+# --------------------- Setup ---------------------
 logging.basicConfig(
     filename='query_logs.log',
 sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
 cache = {}
+# --------------------- Vectorization Function ---------------------
+def vectorize_text(sentences_with_pages):
+    """Vectorize sentences using SentenceTransformer and create a FAISS index."""
+    try:
+        sentences = [item['sentence'] for item in sentences_with_pages]
+        embeddings = sentence_transformer_model.encode(sentences, show_progress_bar=True)
+        index = faiss.IndexFlatL2(embeddings.shape[1])
+        index.add(np.array(embeddings))
+        logging.info(f"Added {len(sentences)} sentences to the vector store.")
+        return index, sentences_with_pages
+    except Exception as e:
+        logging.error(f"Error during vectorization: {str(e)}")
+        return None, None
 # --------------------- PDF Processing ---------------------
 def read_pdf(file_path):
                     sentences_with_pages.append({'sentence': sentence, 'page_number': page_num + 1})
     return sentences_with_pages
+# Read and Vectorize PDF Content
 sentences_with_pages = read_pdf(PDF_PATH)
 vector_index, sentences_with_pages = vectorize_text(sentences_with_pages)
 # --------------------- Query Handling ---------------------
 def generate_query_embedding(query):