chaouch commited on
Commit
798614c
·
1 Parent(s): 8007797
__pycache__/chatbot_module.cpython-310.pyc ADDED
Binary file (826 Bytes). View file
 
__pycache__/recite_module.cpython-310.pyc CHANGED
Binary files a/__pycache__/recite_module.cpython-310.pyc and b/__pycache__/recite_module.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,20 +1,32 @@
1
  import gradio as gr
2
  from recite_module import run
3
  from chatbot_module import respond
 
4
  demo = gr.Blocks()
5
 
6
 
 
 
 
 
 
 
 
7
  demo1 = gr.Interface(
8
  run,
9
  [gr.Audio(sources=["microphone"], type="numpy"), gr.Image(
10
  type="filepath", label="Image")],
11
  gr.Image(type="pil", label="output Image"),
 
 
12
  )
13
  demo2 = gr.Interface(
14
  run,
15
  [gr.Audio(sources=["upload"]), gr.Image(
16
  type="filepath", label="Image")],
17
- [gr.Image(type="pil", label="output Image")]
 
 
18
  )
19
  demo3 = gr.ChatInterface(
20
  respond,
@@ -34,8 +46,17 @@ demo3 = gr.ChatInterface(
34
  ),
35
  ],
36
  )
 
 
 
 
 
 
 
 
 
37
  with demo:
38
- gr.TabbedInterface([demo1, demo2, demo3], [
39
- "Microphone", "Audio File", "Chatbot"])
40
  if __name__ == "__main__":
41
  demo.launch()
 
1
  import gradio as gr
2
  from recite_module import run
3
  from chatbot_module import respond
4
+ from doc_bot import Qa
5
  demo = gr.Blocks()
6
 
7
 
8
+ title = "El_Professor"
9
+ description = """
10
+ Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in English. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
11
+ [SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech:
12
+ ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Digram of cascaded speech to speech translation")
13
+ """
14
+
15
  demo1 = gr.Interface(
16
  run,
17
  [gr.Audio(sources=["microphone"], type="numpy"), gr.Image(
18
  type="filepath", label="Image")],
19
  gr.Image(type="pil", label="output Image"),
20
+ title=title,
21
+ description=description
22
  )
23
  demo2 = gr.Interface(
24
  run,
25
  [gr.Audio(sources=["upload"]), gr.Image(
26
  type="filepath", label="Image")],
27
+ [gr.Image(type="pil", label="output Image")],
28
+ title=title,
29
+ description=description
30
  )
31
  demo3 = gr.ChatInterface(
32
  respond,
 
46
  ),
47
  ],
48
  )
49
+ demo4 = gr.Interface(fn=Qa,
50
+ inputs=[gr.Image(
51
+ type="filepath", label="Upload Image"),
52
+ gr.Textbox(label="Question"),
53
+ gr.Checkbox(label="Internet access")],
54
+ outputs=[gr.Textbox(label="Answer"),
55
+ gr.Textbox(label="Conversations", type="text")],
56
+ title="Chatbot",
57
+ description="")
58
  with demo:
59
+ gr.TabbedInterface([demo1, demo2, demo3, demo4], [
60
+ "Microphone", "Audio File", "general_Chatbot", "Document_Chatbot"])
61
  if __name__ == "__main__":
62
  demo.launch()
chatbot_module.py CHANGED
@@ -7,14 +7,7 @@ For more information on `huggingface_hub` Inference API support, please check th
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
@@ -38,26 +31,3 @@ def respond(
38
 
39
  response += token
40
  yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
 
10
+ def respond(message,history: list[tuple[str, str]],system_message, max_tokens,temperature,top_p):
 
 
 
 
 
 
 
11
  messages = [{"role": "system", "content": system_message}]
12
 
13
  for val in history:
 
31
 
32
  response += token
33
  yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
doc_bot.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
+ import requests
4
+ import tqdm as t
5
+ import re
6
+ from sentence_transformers import SentenceTransformer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import pytesseract
9
+ from PIL import Image
10
+ from collections import deque
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
15
+ model = AutoModelForTokenClassification.from_pretrained(
16
+ "dslim/bert-base-NER")
17
+ summarizer = pipeline(
18
+ "summarization", model="facebook/bart-large-cnn", device=device)
19
+
20
+ qa = pipeline("question-answering",
21
+ model="deepset/roberta-base-squad2", device=device)
22
+
23
+
24
+ def extract_text(image):
25
+ """
26
+ Extracts text from an image using OCR.
27
+ Args:
28
+ image (PIL.Image.Image): Input image.
29
+ Returns:
30
+ dict: Extracted text with confidence and coordinates.
31
+ Raises:
32
+ ValueError: If the input image is not a PIL Image object.
33
+ """
34
+
35
+ result = pytesseract.image_to_data(image, output_type='dict')
36
+ n_boxes = len(result['level'])
37
+ data = {}
38
+ k = 0
39
+ for i in range(n_boxes):
40
+ if result['conf'][i] >= 0.3 and result['text'][i] != '' and result['conf'][i] != -1:
41
+ data[k] = {}
42
+ (x, y, w, h) = (result['left'][i], result['top']
43
+ [i], result['width'][i], result['height'][i])
44
+ data[k]["coordinates"] = (x, y, w, h)
45
+ text, conf = result['text'][k], result['conf'][k]
46
+ data[k]["text"] = text
47
+ data[k]["conf"] = conf
48
+ k += 1
49
+ return data
50
+
51
+
52
+ def strong_entities(question):
53
+ nlp = pipeline("ner", model=model, tokenizer=tokenizer)
54
+ ner_results = nlp(question)
55
+ search_terms = []
56
+ current_term = ""
57
+ for token in ner_results:
58
+ if token["score"] >= 0.99:
59
+ current_term += " " + token["word"]
60
+ else:
61
+ if current_term:
62
+ search_terms.append(current_term.strip())
63
+ current_term = ""
64
+ search_terms.append(token["word"])
65
+ if current_term:
66
+ search_terms.append(current_term.strip())
67
+ print(search_terms[0].split())
68
+ return search_terms[0].split()
69
+
70
+
71
+ def wiki_search(question):
72
+ search_terms = strong_entities(question)
73
+ URL = "https://en.wikipedia.org/w/api.php"
74
+ corpus = []
75
+
76
+ for term in set(search_terms): # Removing duplicates
77
+ SEARCHPAGE = term
78
+ params = {
79
+ "action": "query",
80
+ "format": "json",
81
+ "titles": SEARCHPAGE,
82
+ "prop": "extracts",
83
+ "explaintext": True
84
+ }
85
+
86
+ response = requests.get(URL, params=params)
87
+ try:
88
+ if response.status_code == 200:
89
+ data = response.json()
90
+ for page_id, page_data in t.tqdm(data["query"]["pages"].items()):
91
+ if "extract" in page_data: # Check if extract exists
92
+ corpus.append(page_data["extract"])
93
+ else:
94
+ print("Failed to retrieve data:", response.status_code)
95
+ except Exception as e:
96
+ print("Failed to retrieve data:", e)
97
+
98
+ final_corpus = []
99
+ for text in corpus:
100
+ sections = re.split("\n\n\n== |==\n\n", text)
101
+ for section in sections:
102
+ if len(section.split()) >= 5:
103
+ final_corpus.append(section)
104
+ return " ".join(final_corpus[0:1])
105
+
106
+
107
+ def semantic_search(corpus, question):
108
+ model = SentenceTransformer("all-MiniLM-L6-v2")
109
+ question_embedding = model.encode(question)
110
+
111
+ max_similarity = -1
112
+ most_similar_doc = None
113
+ print(type(corpus[0]))
114
+ print(corpus)
115
+ for doc in t.tqdm(corpus):
116
+ if len(doc.split()) >= 130:
117
+ doc_summary = summarizer(
118
+ doc, max_length=130, min_length=30, do_sample=False)
119
+ if len(doc_summary) > 0 and "summary_text" in doc_summary[0]:
120
+ summarized_doc = doc_summary[0]["summary_text"]
121
+ else:
122
+ summarized_doc = doc
123
+ else:
124
+ summarized_doc = doc
125
+
126
+ doc_embedding = model.encode(summarized_doc)
127
+ similarity = cosine_similarity(
128
+ [question_embedding], [doc_embedding])[0][0]
129
+
130
+ if similarity > max_similarity:
131
+ max_similarity = similarity
132
+ most_similar_doc = summarized_doc
133
+
134
+ return most_similar_doc, similarity
135
+
136
+
137
+ def dm(q, a, corpus, new_q, max_history_size=5):
138
+
139
+ history = deque(maxlen=max_history_size)
140
+ history.append({"question": q, "answer": a, "corpus": corpus})
141
+
142
+ best_corpus_index = None
143
+ max_similarity = -1
144
+
145
+ for i in range(len(history)):
146
+ _, q_similarity = semantic_search([history[i]["corpus"]], new_q)
147
+ _, a_similarity = semantic_search(
148
+ [history[i]["corpus"]], history[i]["answer"])
149
+ similarity = max(q_similarity, a_similarity)
150
+ if similarity > max_similarity:
151
+ max_similarity = similarity
152
+ best_corpus_index = i
153
+
154
+ if best_corpus_index is not None:
155
+ return history[best_corpus_index]["corpus"]
156
+ else:
157
+ return corpus
158
+
159
+
160
+ def first_corp(data, question, botton=False):
161
+
162
+ if botton:
163
+ corpus = wiki_search(question)
164
+ texts = [data[i]["text"] for i in range(len(data))]
165
+ text = " ".join(texts)
166
+ corpus = [cp + " " + text for cp in corpus]
167
+ else:
168
+ texts = [data[i]["text"] for i in range(len(data))]
169
+ text = " ".join(texts)
170
+ corpus = [text]
171
+ return " ".join(corpus)
172
+
173
+
174
+ def Qa(image, new_q, internet_access=False):
175
+ old_q = ["how are you?"]
176
+ old_a = ["I am fine, thank you."]
177
+ im_text = extract_text(image)
178
+ if im_text: # Check if text is extracted
179
+ old_corpus = [first_corp(im_text, old_q[-1], botton=internet_access)]
180
+ else:
181
+ old_corpus = None
182
+
183
+ if internet_access:
184
+ if not old_corpus:
185
+ # Pass None as corpus to trigger internet access
186
+ corpus = dm(old_q[-1], old_a[-1], None, new_q)
187
+ else:
188
+ # Pass old_corpus for internet access
189
+ corpus = dm(old_q[-1], old_a[-1], old_corpus, new_q)
190
+ else:
191
+ corpus = old_corpus[0] if old_corpus else None
192
+
193
+ a = qa(question=new_q, context=corpus)
194
+ old_q.append(new_q)
195
+ old_a.append(a["answer"])
196
+ old_corpus.append(corpus)
197
+
198
+ old_conversations = "\n".join(
199
+ f"Q: {q}\nA: {a}" for q, a in zip(old_q, old_a))
200
+
201
+ return a["answer"], old_conversations
recite_module.py CHANGED
@@ -251,7 +251,6 @@ def run(stream, image):
251
  im_array = np.array(Image.open(image))
252
  data2 = None
253
  for i in range(len(chunks)):
254
- print(match(chunks[i], trns_text))
255
  if match(chunks[i], trns_text) >= 0.10:
256
  data2 = reindex_data(data, index[i], l)
257
  break
 
251
  im_array = np.array(Image.open(image))
252
  data2 = None
253
  for i in range(len(chunks)):
 
254
  if match(chunks[i], trns_text) >= 0.10:
255
  data2 = reindex_data(data, index[i], l)
256
  break