ajoy0071998 commited on
Commit
33889d7
Β·
verified Β·
1 Parent(s): 08219dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -10,18 +10,28 @@ from summa import keywords
10
  from nltk.tokenize import sent_tokenize, word_tokenize
11
  from sentence_transformers import SentenceTransformer, util
12
  import time
 
13
 
14
  # Download required NLTK data
15
  nltk.download('punkt_tab', quiet=True)
16
  nltk.download('stopwords', quiet=True)
17
 
18
- # Load models
19
- nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
 
 
 
 
20
  sbert_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
21
 
22
  CHARS_TO_REMOVE = "(){},;-'\":β€˜β€™β€œβ€"
23
 
24
- # Text processing functions (unchanged from your code)
25
  def clean_text(text):
26
  text = "".join(char if char not in CHARS_TO_REMOVE else " " for char in text)
27
  text = re.sub(r'\s+', ' ', text).strip()
@@ -115,7 +125,7 @@ def correct_keywords(query_keywords, stored_keywords, threshold=2):
115
  corrected_keywords.add(qk)
116
  return corrected_keywords
117
 
118
- # Bit Vector-based search and retrieval (adapted for multiple PDFs)
119
  def process_pdf(pdf_file):
120
  text = extract_text_from_pdf(pdf_file)
121
  text = clean_text(text)
@@ -210,8 +220,8 @@ if uploaded_files:
210
  # Query input
211
  query = st.text_input("Enter your query:")
212
 
213
- # Mistral API key (you may want to secure this differently in production)
214
- MISTRAL_API_KEY = "S3vzsvK7rP5in24joHgL55dVCjqYSi1F"
215
 
216
  if st.button("Search") and query and st.session_state.processed_pdfs:
217
  with st.spinner("Searching..."):
 
10
  from nltk.tokenize import sent_tokenize, word_tokenize
11
  from sentence_transformers import SentenceTransformer, util
12
  import time
13
+ import os
14
 
15
  # Download required NLTK data
16
  nltk.download('punkt_tab', quiet=True)
17
  nltk.download('stopwords', quiet=True)
18
 
19
+ # Load SpaCy model with a check to download if missing
20
+ def load_spacy_model():
21
+ model_name = "en_core_web_sm"
22
+ try:
23
+ return spacy.load(model_name)
24
+ except OSError:
25
+ st.warning(f"Model '{model_name}' not found. Downloading now...")
26
+ subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)
27
+ return spacy.load(model_name)
28
+
29
+ nlp = load_spacy_model()
30
  sbert_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
31
 
32
  CHARS_TO_REMOVE = "(){},;-'\":β€˜β€™β€œβ€"
33
 
34
+ # Text processing functions (unchanged)
35
  def clean_text(text):
36
  text = "".join(char if char not in CHARS_TO_REMOVE else " " for char in text)
37
  text = re.sub(r'\s+', ' ', text).strip()
 
125
  corrected_keywords.add(qk)
126
  return corrected_keywords
127
 
128
+ # Bit Vector-based search and retrieval (unchanged)
129
  def process_pdf(pdf_file):
130
  text = extract_text_from_pdf(pdf_file)
131
  text = clean_text(text)
 
220
  # Query input
221
  query = st.text_input("Enter your query:")
222
 
223
+ # Mistral API key from environment variable (recommended for security)
224
+ MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY", "S3vzsvK7rP5in24joHgL55dVCjqYSi1F") # Fallback for local testing
225
 
226
  if st.button("Search") and query and st.session_state.processed_pdfs:
227
  with st.spinner("Searching..."):