mojad121 commited on
Commit
23ee1af
·
verified ·
1 Parent(s): 311e164

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +18 -12
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,15 +1,18 @@
 
 
1
  import os
2
  import json
3
- import re
4
  import math
5
- import nltk
 
6
  from collections import defaultdict, Counter
7
  from nltk.tokenize import word_tokenize
8
  from nltk.stem import PorterStemmer, WordNetLemmatizer
9
- import gradio as gr
10
- nltk.data.path.append("nltk_data")
11
 
 
 
12
 
 
13
 
14
  with open("docs.json", "r", encoding="utf-8") as f:
15
  docs_ds = json.load(f)
@@ -20,8 +23,6 @@ with open("queries.json", "r", encoding="utf-8") as f:
20
  documents = {int(doc["doc_id"]): doc["text"] for doc in docs_ds}
21
  queries = {int(q["query_id"]): q["text"] for q in queries_ds}
22
 
23
- stop_words = {"a", "is", "the", "of", "all", "and", "to", "can", "be", "as", "once", "for", "at", "am", "are", "has", "have", "had", "up", "his", "her", "in", "on", "no", "we", "do"}
24
-
25
  inverted_index = defaultdict(set)
26
  positional_index = defaultdict(lambda: defaultdict(list))
27
  tf_idf_vectors = defaultdict(dict)
@@ -121,18 +122,23 @@ def execute_vsm_query(user_input_query, alpha=0.001):
121
  scores[doc_id] = sim
122
  return sorted(scores, key=scores.get, reverse=True)
123
 
124
- def chat(query, method):
 
 
125
  if not query:
126
  return "Query cannot be empty"
127
  if method == "Boolean":
128
  result = execute_boolean_query(query, documents)
129
  elif method == "Proximity":
130
  result = execute_proximity_query(query)
131
- else:
132
  result = execute_vsm_query(query)
133
  return f"Result-set: {result}"
134
 
135
- process_documents(documents)
136
-
137
- demo = gr.Interface(fn=chat, inputs=["text", gr.Radio(["Boolean", "Proximity", "Vector Space Model"], label="Model")], outputs="text")
138
- demo.launch()
 
 
 
 
1
+
2
+ import nltk
3
  import os
4
  import json
 
5
  import math
6
+ import re
7
+ import gradio as gr
8
  from collections import defaultdict, Counter
9
  from nltk.tokenize import word_tokenize
10
  from nltk.stem import PorterStemmer, WordNetLemmatizer
 
 
11
 
12
+ nltk.download("punkt")
13
+ nltk.download("wordnet")
14
 
15
+ stop_words = {"a", "is", "the", "of", "all", "and", "to", "can", "be", "as", "once", "for", "at", "am", "are", "has", "have", "had", "up", "his", "her", "in", "on", "no", "we", "do"}
16
 
17
  with open("docs.json", "r", encoding="utf-8") as f:
18
  docs_ds = json.load(f)
 
23
  documents = {int(doc["doc_id"]): doc["text"] for doc in docs_ds}
24
  queries = {int(q["query_id"]): q["text"] for q in queries_ds}
25
 
 
 
26
  inverted_index = defaultdict(set)
27
  positional_index = defaultdict(lambda: defaultdict(list))
28
  tf_idf_vectors = defaultdict(dict)
 
122
  scores[doc_id] = sim
123
  return sorted(scores, key=scores.get, reverse=True)
124
 
125
+ process_documents(documents)
126
+
127
+ def chatbot_fn(query, method):
128
  if not query:
129
  return "Query cannot be empty"
130
  if method == "Boolean":
131
  result = execute_boolean_query(query, documents)
132
  elif method == "Proximity":
133
  result = execute_proximity_query(query)
134
+ elif method == "Vector Space Model":
135
  result = execute_vsm_query(query)
136
  return f"Result-set: {result}"
137
 
138
+ iface = gr.Interface(
139
+ fn=chatbot_fn,
140
+ inputs=["text", gr.Radio(["Boolean", "Proximity", "Vector Space Model"], label="Method")],
141
+ outputs="text",
142
+ title="Information Retrieval Chatbot",
143
+ )
144
+ iface.launch()
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- huggingface_hub==0.25.2
2
  gradio
3
  nltk
 
1
+
2
  gradio
3
  nltk