milyiyo commited on
Commit
8c77fdd
·
1 Parent(s): d474737

Add the documentation to some functions

Browse files
Files changed (1) hide show
  1. functions.py +79 -3
functions.py CHANGED
@@ -30,6 +30,16 @@ text_splitter = CharacterTextSplitter()
30
 
31
 
32
  def get_nearest_examples(question: str, k: int):
 
 
 
 
 
 
 
 
 
 
33
  print(['get_nearest_examples', 'start'])
34
  question_embedding = get_embeddings([question]).cpu().detach().numpy()
35
  embeddings_dataset = shared['embeddings_dataset']
@@ -56,6 +66,15 @@ def get_embeddings(text):
56
 
57
 
58
  def build_faiss_index(text):
 
 
 
 
 
 
 
 
 
59
  print(['build_faiss_index', 'start'])
60
  text_list = split_text(text)
61
  emb_list = []
@@ -71,6 +90,15 @@ def build_faiss_index(text):
71
 
72
 
73
  def extract_text(url: str):
 
 
 
 
 
 
 
 
 
74
  print(['extract_text', 'start'])
75
  if url is None or url.strip() == '':
76
  return ''
@@ -83,20 +111,50 @@ def extract_text(url: str):
83
 
84
 
85
  def split_text(text: str):
 
 
 
 
 
 
 
 
 
86
  lines = text.split('\n')
87
  lines = [line.strip() for line in lines if line.strip()]
88
  return lines
89
 
90
 
91
  def remove_prompt(text: str) -> str:
 
 
 
 
 
 
 
 
 
92
  output_prompt = 'Output: '
93
- idx = text.index(output_prompt)
94
- res = text[idx + len(output_prompt):].strip()
95
- res = res.replace('Input: ', '')
 
 
 
96
  return res
97
 
98
 
99
  def summarize_text(text: str) -> str:
 
 
 
 
 
 
 
 
 
100
  print(['summarize_text', 'start'])
101
 
102
  print(['summarize_text', 'splitting text'])
@@ -132,6 +190,15 @@ def summarize_text_v1(text: str):
132
 
133
 
134
  def generate_question(text: str):
 
 
 
 
 
 
 
 
 
135
  print(['generate_question', 'start'])
136
  # Get a random section of the whole text to generate a question
137
  fragments = split_text(text)
@@ -156,6 +223,15 @@ def get_answer_context():
156
 
157
 
158
  def answer_question(question: str):
 
 
 
 
 
 
 
 
 
159
  print(['answer_question', 'start'])
160
  full_text = shared['full_text']
161
 
 
30
 
31
 
32
  def get_nearest_examples(question: str, k: int):
33
+ """
34
+ Returns the k nearest examples to a given question.
35
+
36
+ Args:
37
+ question (str): The input question to find nearest examples for.
38
+ k (int): The number of nearest examples to retrieve.
39
+
40
+ Returns:
41
+ The k nearest examples to the given question.
42
+ """
43
  print(['get_nearest_examples', 'start'])
44
  question_embedding = get_embeddings([question]).cpu().detach().numpy()
45
  embeddings_dataset = shared['embeddings_dataset']
 
66
 
67
 
68
  def build_faiss_index(text):
69
+ """
70
+ Builds a FAISS index for the given text.
71
+
72
+ Args:
73
+ text (str): The input text to build a FAISS index for.
74
+
75
+ Returns:
76
+ None.
77
+ """
78
  print(['build_faiss_index', 'start'])
79
  text_list = split_text(text)
80
  emb_list = []
 
90
 
91
 
92
  def extract_text(url: str):
93
+ """
94
+ Extracts the text content from a given URL and returns it as a string.
95
+
96
+ Args:
97
+ url (str): The URL to extract text content from.
98
+
99
+ Returns:
100
+ str: The text content extracted from the URL, or an empty string if the URL is invalid.
101
+ """
102
  print(['extract_text', 'start'])
103
  if url is None or url.strip() == '':
104
  return ''
 
111
 
112
 
113
  def split_text(text: str):
114
+ """
115
+ Splits a given text into a list of individual lines.
116
+
117
+ Args:
118
+ text (str): The input text to split into lines.
119
+
120
+ Returns:
121
+ List[str]: A list of individual lines in the input text.
122
+ """
123
  lines = text.split('\n')
124
  lines = [line.strip() for line in lines if line.strip()]
125
  return lines
126
 
127
 
128
  def remove_prompt(text: str) -> str:
129
+ """
130
+ Removes the prompt from a given text and returns the resulting text.
131
+
132
+ Args:
133
+ text (str): The input text to remove the prompt from.
134
+
135
+ Returns:
136
+ str: The input text with the prompt removed, or the original text if the prompt is not found.
137
+ """
138
  output_prompt = 'Output: '
139
+ try:
140
+ idx = text.index(output_prompt)
141
+ res = text[idx + len(output_prompt):].strip()
142
+ res = res.replace('Input: ', '')
143
+ except ValueError:
144
+ res = text
145
  return res
146
 
147
 
148
  def summarize_text(text: str) -> str:
149
+ """
150
+ Generates a summary of the given text using a pre-trained language model.
151
+
152
+ Args:
153
+ text (str): The input text to generate a summary for.
154
+
155
+ Returns:
156
+ str: The generated summary for the input text.
157
+ """
158
  print(['summarize_text', 'start'])
159
 
160
  print(['summarize_text', 'splitting text'])
 
190
 
191
 
192
  def generate_question(text: str):
193
+ """
194
+ Generates a question based on a random section of the input text using a pre-trained language model.
195
+
196
+ Args:
197
+ text (str): The input text to generate a question for.
198
+
199
+ Returns:
200
+ str: The generated question for the input text.
201
+ """
202
  print(['generate_question', 'start'])
203
  # Get a random section of the whole text to generate a question
204
  fragments = split_text(text)
 
223
 
224
 
225
  def answer_question(question: str):
226
+ """
227
+ Generates an answer to the given question based on a pre-trained language model and a pre-built Faiss index.
228
+
229
+ Args:
230
+ question (str): The question to generate an answer for.
231
+
232
+ Returns:
233
+ str: The generated answer for the question.
234
+ """
235
  print(['answer_question', 'start'])
236
  full_text = shared['full_text']
237