Tayel commited on
Commit
3aedc1b
Β·
verified Β·
1 Parent(s): 95ee4c0

Upload Final_Revision_Project_DEPI.ipynb

Browse files
Files changed (1) hide show
  1. Final_Revision_Project_DEPI.ipynb +296 -0
Final_Revision_Project_DEPI.ipynb ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "!pip install gtts\n",
21
+ "!pip install gradio\n"
22
+ ],
23
+ "metadata": {
24
+ "colab": {
25
+ "base_uri": "https://localhost:8080/"
26
+ },
27
+ "id": "ufQmdSNceI4n",
28
+ "outputId": "2c7a47dc-d12e-4913-fe64-4d227fd71a5b"
29
+ },
30
+ "execution_count": 2,
31
+ "outputs": [
32
+ {
33
+ "output_type": "stream",
34
+ "name": "stdout",
35
+ "text": [
36
+ "Collecting gtts\n",
37
+ " Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)\n",
38
+ "Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gtts) (2.32.3)\n",
39
+ "Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gtts) (8.1.7)\n",
40
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.3.2)\n",
41
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.10)\n",
42
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2.2.3)\n",
43
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2024.8.30)\n",
44
+ "Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)\n",
45
+ "Installing collected packages: gtts\n",
46
+ "Successfully installed gtts-2.5.3\n",
47
+ "Collecting gradio\n",
48
+ " Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)\n",
49
+ "Collecting aiofiles<24.0,>=22.0 (from gradio)\n",
50
+ " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n",
51
+ "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
52
+ "Collecting fastapi<1.0 (from gradio)\n",
53
+ " Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)\n",
54
+ "Collecting ffmpy (from gradio)\n",
55
+ " Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n",
56
+ "Collecting gradio-client==1.3.0 (from gradio)\n",
57
+ " Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)\n",
58
+ "Collecting httpx>=0.24.1 (from gradio)\n",
59
+ " Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n",
60
+ "Requirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.24.7)\n",
61
+ "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.5)\n",
62
+ "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n",
63
+ "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n",
64
+ "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
65
+ "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.26.4)\n",
66
+ "Collecting orjson~=3.0 (from gradio)\n",
67
+ " Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n",
68
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
69
+ "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (24.1)\n",
70
+ "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.4)\n",
71
+ "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (10.4.0)\n",
72
+ "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.9.2)\n",
73
+ "Collecting pydub (from gradio)\n",
74
+ " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
75
+ "Collecting python-multipart>=0.0.9 (from gradio)\n",
76
+ " Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)\n",
77
+ "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.2)\n",
78
+ "Collecting ruff>=0.2.2 (from gradio)\n",
79
+ " Downloading ruff-0.6.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n",
80
+ "Collecting semantic-version~=2.0 (from gradio)\n",
81
+ " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
82
+ "Collecting tomlkit==0.12.0 (from gradio)\n",
83
+ " Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
84
+ "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.12.5)\n",
85
+ "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.12.2)\n",
86
+ "Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.2.3)\n",
87
+ "Collecting uvicorn>=0.14.0 (from gradio)\n",
88
+ " Downloading uvicorn-0.31.0-py3-none-any.whl.metadata (6.6 kB)\n",
89
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.3.0->gradio) (2024.6.1)\n",
90
+ "Collecting websockets<13.0,>=10.0 (from gradio-client==1.3.0->gradio)\n",
91
+ " Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
92
+ "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n",
93
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n",
94
+ "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.2.2)\n",
95
+ "Collecting starlette<0.39.0,>=0.37.2 (from fastapi<1.0->gradio)\n",
96
+ " Downloading starlette-0.38.6-py3-none-any.whl.metadata (6.0 kB)\n",
97
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2024.8.30)\n",
98
+ "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n",
99
+ " Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n",
100
+ "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n",
101
+ " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
102
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (3.16.1)\n",
103
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (2.32.3)\n",
104
+ "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (4.66.5)\n",
105
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.3.0)\n",
106
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.12.1)\n",
107
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.53.1)\n",
108
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.7)\n",
109
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.4)\n",
110
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n",
111
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.2)\n",
112
+ "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
113
+ "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n",
114
+ "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (2.23.4)\n",
115
+ "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n",
116
+ "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n",
117
+ "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.8.1)\n",
118
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n",
119
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
120
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n",
121
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.3->gradio) (3.3.2)\n",
122
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n",
123
+ "Downloading gradio-4.44.0-py3-none-any.whl (18.1 MB)\n",
124
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.1/18.1 MB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
125
+ "\u001b[?25hDownloading gradio_client-1.3.0-py3-none-any.whl (318 kB)\n",
126
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.7/318.7 kB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
127
+ "\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
128
+ "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
129
+ "Downloading fastapi-0.115.0-py3-none-any.whl (94 kB)\n",
130
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
131
+ "\u001b[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)\n",
132
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
133
+ "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
134
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
135
+ "\u001b[?25hDownloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n",
136
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
137
+ "\u001b[?25hDownloading python_multipart-0.0.12-py3-none-any.whl (23 kB)\n",
138
+ "Downloading ruff-0.6.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)\n",
139
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
140
+ "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
141
+ "Downloading uvicorn-0.31.0-py3-none-any.whl (63 kB)\n",
142
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.7/63.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
143
+ "\u001b[?25hDownloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n",
144
+ "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
145
+ "Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
146
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
147
+ "\u001b[?25hDownloading starlette-0.38.6-py3-none-any.whl (71 kB)\n",
148
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
149
+ "\u001b[?25hDownloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
150
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
151
+ "\u001b[?25hInstalling collected packages: pydub, websockets, tomlkit, semantic-version, ruff, python-multipart, orjson, h11, ffmpy, aiofiles, uvicorn, starlette, httpcore, httpx, fastapi, gradio-client, gradio\n",
152
+ "Successfully installed aiofiles-23.2.1 fastapi-0.115.0 ffmpy-0.4.0 gradio-4.44.0 gradio-client-1.3.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.2 orjson-3.10.7 pydub-0.25.1 python-multipart-0.0.12 ruff-0.6.8 semantic-version-2.10.0 starlette-0.38.6 tomlkit-0.12.0 uvicorn-0.31.0 websockets-12.0\n"
153
+ ]
154
+ }
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 3,
160
+ "metadata": {
161
+ "colab": {
162
+ "base_uri": "https://localhost:8080/",
163
+ "height": 626
164
+ },
165
+ "id": "s-QPCL4neAAp",
166
+ "outputId": "dea5531e-c00a-455b-fab0-3639b94b39e6"
167
+ },
168
+ "outputs": [
169
+ {
170
+ "output_type": "stream",
171
+ "name": "stdout",
172
+ "text": [
173
+ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
174
+ "\n",
175
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
176
+ "Running on public URL: https://051fe2f8995655b4cd.gradio.live\n",
177
+ "\n",
178
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
179
+ ]
180
+ },
181
+ {
182
+ "output_type": "display_data",
183
+ "data": {
184
+ "text/plain": [
185
+ "<IPython.core.display.HTML object>"
186
+ ],
187
+ "text/html": [
188
+ "<div><iframe src=\"https://051fe2f8995655b4cd.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
189
+ ]
190
+ },
191
+ "metadata": {}
192
+ },
193
+ {
194
+ "output_type": "execute_result",
195
+ "data": {
196
+ "text/plain": []
197
+ },
198
+ "metadata": {},
199
+ "execution_count": 3
200
+ }
201
+ ],
202
+ "source": [
203
+ "# Import necessary libraries and modules\n",
204
+ "from transformers import BlipProcessor, BlipForConditionalGeneration, MBartForConditionalGeneration, MBart50Tokenizer\n",
205
+ "from gtts import gTTS\n",
206
+ "from PIL import Image\n",
207
+ "import gradio as gr\n",
208
+ "\n",
209
+ "# Pipeline Component 1: Image Captioning Model\n",
210
+ "class ImageToText:\n",
211
+ " def __init__(self):\n",
212
+ " \"\"\"Initializes the BLIP model for image captioning.\"\"\"\n",
213
+ " self.processor = BlipProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
214
+ " self.model = BlipForConditionalGeneration.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
215
+ " print(\"BLIP Image Captioning Model Loaded\")\n",
216
+ "\n",
217
+ " def generate_caption(self, img):\n",
218
+ " \"\"\"Generates a caption for the given image.\"\"\"\n",
219
+ " inputs = self.processor(images=img, return_tensors=\"pt\")\n",
220
+ " generated_ids = self.model.generate(**inputs)\n",
221
+ " caption = self.processor.decode(generated_ids[0], skip_special_tokens=True)\n",
222
+ " return caption\n",
223
+ "\n",
224
+ "# Pipeline Component 2: Arabic Translation Model (mBART)\n",
225
+ "class ArabicTranslator:\n",
226
+ " def __init__(self):\n",
227
+ " \"\"\"Initializes the mBART model for English to Arabic translation.\"\"\"\n",
228
+ " self.tokenizer = MBart50Tokenizer.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n",
229
+ " self.model = MBartForConditionalGeneration.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n",
230
+ " print(\"mBART Arabic Translation Model Loaded\")\n",
231
+ "\n",
232
+ " def translate(self, text):\n",
233
+ " \"\"\"Translates the given English text to Arabic.\"\"\"\n",
234
+ " inputs = self.tokenizer(text, return_tensors=\"pt\", src_lang=\"en_XX\")\n",
235
+ " translated = self.model.generate(inputs[\"input_ids\"], forced_bos_token_id=self.tokenizer.lang_code_to_id[\"ar_AR\"])\n",
236
+ " translated_text = self.tokenizer.batch_decode(translated, skip_special_tokens=True)[0]\n",
237
+ " return translated_text\n",
238
+ "\n",
239
+ "# Pipeline Component 3: Text-to-Speech Model (gTTS)\n",
240
+ "class TextToSpeech:\n",
241
+ " def __init__(self, lang='ar'):\n",
242
+ " \"\"\"Initializes the Text-to-Speech system for Arabic.\"\"\"\n",
243
+ " self.lang = lang\n",
244
+ "\n",
245
+ " def generate_audio(self, text):\n",
246
+ " \"\"\"Generates audio from the given Arabic text.\"\"\"\n",
247
+ " tts = gTTS(text=text, lang=self.lang, slow=False)\n",
248
+ " audio_file_path = 'output.mp3'\n",
249
+ " tts.save(audio_file_path)\n",
250
+ " return audio_file_path\n",
251
+ "\n",
252
+ "# Main Pipeline Integration\n",
253
+ "class ImageToArabicSpeechPipeline:\n",
254
+ " def __init__(self):\n",
255
+ " \"\"\"Initializes all pipeline components.\"\"\"\n",
256
+ " self.caption_model = ImageToText()\n",
257
+ " self.translation_model = ArabicTranslator()\n",
258
+ " self.tts_model = TextToSpeech()\n",
259
+ "\n",
260
+ " def process_image(self, img):\n",
261
+ " \"\"\"Processes the image, generates a caption, translates it to Arabic, and converts it to speech.\"\"\"\n",
262
+ " caption = self.caption_model.generate_caption(img)\n",
263
+ " translated_text = self.translation_model.translate(caption)\n",
264
+ " audio_file = self.tts_model.generate_audio(translated_text)\n",
265
+ " return caption, translated_text, audio_file\n",
266
+ "\n",
267
+ "# Gradio Interface Setup\n",
268
+ "def demo(image):\n",
269
+ " \"\"\"Function to be used in Gradio for processing the image and returning caption, translation, and audio.\"\"\"\n",
270
+ " img = Image.open(image)\n",
271
+ " pipeline = ImageToArabicSpeechPipeline()\n",
272
+ " caption, translated_text, audio_file = pipeline.process_image(img)\n",
273
+ " return caption, translated_text, audio_file\n",
274
+ "\n",
275
+ "# Define Gradio Interface\n",
276
+ "iface = gr.Interface(\n",
277
+ " fn=demo,\n",
278
+ " inputs=gr.Image(type=\"filepath\"),\n",
279
+ " outputs=[gr.Textbox(label=\"Caption\"), gr.Textbox(label=\"Translated Text\"), gr.Audio(label=\"Generated Speech\")]\n",
280
+ ")\n",
281
+ "\n",
282
+ "# Launch the Gradio Interface\n",
283
+ "iface.launch()\n"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "source": [],
289
+ "metadata": {
290
+ "id": "BqT55AmHeBCv"
291
+ },
292
+ "execution_count": null,
293
+ "outputs": []
294
+ }
295
+ ]
296
+ }