Spaces:
Running
Running
chabane
commited on
Commit
·
0b9c1c4
1
Parent(s):
1f661eb
update the ui and the main and req
Browse files- main.py +52 -59
- requirements.txt +1 -0
- static/scripts/dataViz.js +2 -2
- static/scripts/image_iterpretation.js +17 -13
- static/scripts/summerize.js +1 -3
- static/styles/notification.css +1 -1
main.py
CHANGED
@@ -20,7 +20,6 @@ import fitz
|
|
20 |
|
21 |
from huggingface_hub import snapshot_download
|
22 |
from transformers import (
|
23 |
-
TFAutoModelForVision2Seq, AutoProcessor,
|
24 |
AutoTokenizer, AutoModelForSeq2SeqLM,
|
25 |
AutoModelForCausalLM,pipeline
|
26 |
)
|
@@ -73,21 +72,6 @@ app.add_middleware(
|
|
73 |
allow_headers=["*"],
|
74 |
)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
92 |
templates = Jinja2Templates(directory='templates')
|
93 |
|
@@ -108,55 +92,62 @@ def index(req:Request):
|
|
108 |
|
109 |
|
110 |
|
111 |
-
@app.post("/
|
112 |
-
def
|
113 |
-
extension =
|
114 |
Supported_extensions = ["png","jpg","jpeg"]
|
115 |
if extension not in Supported_extensions:
|
116 |
-
return {"error": "Unsupported file type"}
|
117 |
-
image = Image.open(
|
118 |
global interpreter
|
|
|
|
|
|
|
|
|
119 |
|
120 |
-
caption =
|
121 |
-
|
122 |
-
return {"caption": caption[0]['generated_text']}
|
123 |
|
124 |
@app.post("/summerize")
|
125 |
def summerzation(file:UploadFile=File(...)):
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
150 |
|
151 |
|
152 |
@app.post("/plot")
|
153 |
-
def plot(
|
154 |
try:
|
155 |
extension = file.filename.split(".")[-1]
|
156 |
Supported_extensions = ["xlsx","xls"]
|
157 |
if extension not in Supported_extensions:
|
158 |
-
return {"error": "Unsupported file type"}
|
159 |
-
|
|
|
|
|
160 |
message = f"""
|
161 |
You are a helpful assistant that helps users write Python code.
|
162 |
## Requirements:
|
@@ -173,6 +164,7 @@ df.dtypes:{df.dtypes.to_dict()}
|
|
173 |
-don't use any other libraries except pandas, matplotlib, seaborn.
|
174 |
-don't use any other functions except the ones provided in the libraries.
|
175 |
-don't write the code for the dataframe creation.
|
|
|
176 |
-exclude plt.show() from the code.
|
177 |
-you have to write the code in a markdown code block.
|
178 |
-make sure that the type of the chart is compatible with the dtypes of the columns
|
@@ -181,14 +173,14 @@ df.dtypes:{df.dtypes.to_dict()}
|
|
181 |
-if the user task is not clear or there is an error like the column names are not in the dataframe, raise an
|
182 |
error.
|
183 |
|
184 |
-
##Prompt: {
|
185 |
"""
|
186 |
global generator
|
187 |
output = generator(message, max_length=1000)
|
188 |
match = re.search(r'```python(.*?)```', output[0]["generated_text"], re.DOTALL)
|
189 |
code =''
|
190 |
if not match:
|
191 |
-
return {"error": "Can't generate the plot"}
|
192 |
|
193 |
code = match.group(1).replace("plt.show()\n","")
|
194 |
|
@@ -204,19 +196,20 @@ error.
|
|
204 |
plt.savefig(buf, format='png')
|
205 |
buf.seek(0)
|
206 |
base64_image = base64.b64encode(buf.getvalue()).decode('utf-8')
|
207 |
-
return {"plot": f"data:image/png;base64,{base64_image}"}
|
208 |
except Exception as e:
|
209 |
-
|
|
|
210 |
except Exception as exp:
|
211 |
-
return {"error":"Internel Server Error:"+str(exp)}
|
212 |
|
213 |
|
214 |
|
215 |
|
216 |
|
217 |
|
218 |
-
def get_text_from_PDF(
|
219 |
-
doc = fitz.open(
|
220 |
text = ""
|
221 |
for page in doc:
|
222 |
text += page.get_text()
|
@@ -240,6 +233,6 @@ def get_text_from_DOC(file):
|
|
240 |
return text
|
241 |
|
242 |
def get_text_from_EXCEL(file):
|
243 |
-
df = pd.read_excel(file)
|
244 |
text = df.to_string()
|
245 |
return text
|
|
|
20 |
|
21 |
from huggingface_hub import snapshot_download
|
22 |
from transformers import (
|
|
|
23 |
AutoTokenizer, AutoModelForSeq2SeqLM,
|
24 |
AutoModelForCausalLM,pipeline
|
25 |
)
|
|
|
72 |
allow_headers=["*"],
|
73 |
)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
76 |
templates = Jinja2Templates(directory='templates')
|
77 |
|
|
|
92 |
|
93 |
|
94 |
|
95 |
+
@app.post("/interpret")
|
96 |
+
def interpret(file_img:UploadFile=File(...)):
|
97 |
+
extension = file_img.filename.split(".")[-1]
|
98 |
Supported_extensions = ["png","jpg","jpeg"]
|
99 |
if extension not in Supported_extensions:
|
100 |
+
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
101 |
+
image = Image.open(file_img.file)
|
102 |
global interpreter
|
103 |
+
try:
|
104 |
+
caption = interpreter(image)
|
105 |
+
except Exception as exp:
|
106 |
+
return JSONResponse(content={"error": "Can't interpret the image "},status_code=400)
|
107 |
|
108 |
+
return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
|
|
|
|
|
109 |
|
110 |
@app.post("/summerize")
|
111 |
def summerzation(file:UploadFile=File(...)):
|
112 |
+
try:
|
113 |
+
extension = file.filename.split(".")[-1]
|
114 |
+
if extension == "pdf":
|
115 |
+
text = get_text_from_PDF(file.file)
|
116 |
+
elif extension == "docx":
|
117 |
+
text = get_text_from_DOC(file.file)
|
118 |
+
elif extension == "pptx":
|
119 |
+
text = get_text_from_PPT(file.file)
|
120 |
+
elif extension == "xlsx":
|
121 |
+
text = get_text_from_EXCEL(file.file)
|
122 |
+
else:
|
123 |
+
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
124 |
+
|
125 |
+
if not text.strip():
|
126 |
+
return JSONResponse(content={'error':'File is emplty'},status_code=400)
|
127 |
+
|
128 |
+
result=""
|
129 |
+
global summarizer
|
130 |
+
for i in range(0, len(text), 1024):
|
131 |
+
try:
|
132 |
+
summary = summarizer(text[i:i+1024], max_length=150, min_length=30, do_sample=False)
|
133 |
+
result += summary[0]['summary_text']
|
134 |
+
except Exception as e:
|
135 |
+
return JSONResponse(content={"error": f"Summarization failed: {str(e)}"},status_code=403)
|
136 |
+
return JSONResponse(content={"summary": result},status_code=200)
|
137 |
+
except Exception as exp:
|
138 |
+
return JSONResponse(content={"error":"Internel Server Error:"+str(exp)} ,status_code=500)
|
139 |
|
140 |
|
141 |
@app.post("/plot")
|
142 |
+
def plot(user_need:str,file:UploadFile=File(...)):
|
143 |
try:
|
144 |
extension = file.filename.split(".")[-1]
|
145 |
Supported_extensions = ["xlsx","xls"]
|
146 |
if extension not in Supported_extensions:
|
147 |
+
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
148 |
+
|
149 |
+
df = pd.read_excel(io=file.file)
|
150 |
+
|
151 |
message = f"""
|
152 |
You are a helpful assistant that helps users write Python code.
|
153 |
## Requirements:
|
|
|
164 |
-don't use any other libraries except pandas, matplotlib, seaborn.
|
165 |
-don't use any other functions except the ones provided in the libraries.
|
166 |
-don't write the code for the dataframe creation.
|
167 |
+
-check if the columns has a nan values and raise exception if yes .
|
168 |
-exclude plt.show() from the code.
|
169 |
-you have to write the code in a markdown code block.
|
170 |
-make sure that the type of the chart is compatible with the dtypes of the columns
|
|
|
173 |
-if the user task is not clear or there is an error like the column names are not in the dataframe, raise an
|
174 |
error.
|
175 |
|
176 |
+
##Prompt: {user_need}.
|
177 |
"""
|
178 |
global generator
|
179 |
output = generator(message, max_length=1000)
|
180 |
match = re.search(r'```python(.*?)```', output[0]["generated_text"], re.DOTALL)
|
181 |
code =''
|
182 |
if not match:
|
183 |
+
return JSONResponse(content={"error": "Can't generate the plot"},status_code=403)
|
184 |
|
185 |
code = match.group(1).replace("plt.show()\n","")
|
186 |
|
|
|
196 |
plt.savefig(buf, format='png')
|
197 |
buf.seek(0)
|
198 |
base64_image = base64.b64encode(buf.getvalue()).decode('utf-8')
|
199 |
+
return JSONResponse(content={"plot": f"data:image/png;base64,{base64_image}"},status_code=200)
|
200 |
except Exception as e:
|
201 |
+
print(e)
|
202 |
+
return JSONResponse(content={"error": str(e) },status_code=500)
|
203 |
except Exception as exp:
|
204 |
+
return JSONResponse(content={"error":"Internel Server Error:"+str(exp)} ,status_code=500)
|
205 |
|
206 |
|
207 |
|
208 |
|
209 |
|
210 |
|
211 |
+
def get_text_from_PDF(file_content):
|
212 |
+
doc = fitz.open(stream=file_content, filetype="pdf")
|
213 |
text = ""
|
214 |
for page in doc:
|
215 |
text += page.get_text()
|
|
|
233 |
return text
|
234 |
|
235 |
def get_text_from_EXCEL(file):
|
236 |
+
df = pd.read_excel(io=file)
|
237 |
text = df.to_string()
|
238 |
return text
|
requirements.txt
CHANGED
@@ -4,6 +4,7 @@ python-docx
|
|
4 |
python-pptx
|
5 |
pandas
|
6 |
python-multipart
|
|
|
7 |
PyMuPDF
|
8 |
matplotlib
|
9 |
transformers
|
|
|
4 |
python-pptx
|
5 |
pandas
|
6 |
python-multipart
|
7 |
+
openpyxl
|
8 |
PyMuPDF
|
9 |
matplotlib
|
10 |
transformers
|
static/scripts/dataViz.js
CHANGED
@@ -72,12 +72,12 @@ label.addEventListener("drop", (e) => {
|
|
72 |
});
|
73 |
form.addEventListener("submit", (e) => {
|
74 |
e.preventDefault();
|
75 |
-
const prompt = prompt_txt.value.trim();
|
76 |
if (prompt && file) {
|
77 |
showchart("");
|
78 |
} else {
|
79 |
console.log("no file selected or no prompted");
|
80 |
-
}
|
81 |
});
|
82 |
const showchart = (url) => {
|
83 |
chart_wrapper.innerHTML = `
|
|
|
72 |
});
|
73 |
form.addEventListener("submit", (e) => {
|
74 |
e.preventDefault();
|
75 |
+
/*const prompt = prompt_txt.value.trim();
|
76 |
if (prompt && file) {
|
77 |
showchart("");
|
78 |
} else {
|
79 |
console.log("no file selected or no prompted");
|
80 |
+
}*/
|
81 |
});
|
82 |
const showchart = (url) => {
|
83 |
chart_wrapper.innerHTML = `
|
static/scripts/image_iterpretation.js
CHANGED
@@ -75,17 +75,21 @@ label.addEventListener("drop", (e) => {
|
|
75 |
});
|
76 |
form.addEventListener("submit", async (e) => {
|
77 |
e.preventDefault();
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
91 |
});
|
|
|
75 |
});
|
76 |
form.addEventListener("submit", async (e) => {
|
77 |
e.preventDefault();
|
78 |
+
if (!file) {
|
79 |
+
toast("No Image selected ...", "warning");
|
80 |
+
} else {
|
81 |
+
const data = new FormData();
|
82 |
+
data.append("file_img", file);
|
83 |
+
const response = await fetch("/interpret", {
|
84 |
+
method: "post",
|
85 |
+
body: data,
|
86 |
+
});
|
87 |
+
const result = await response.json();
|
88 |
+
if (result.error) {
|
89 |
+
toast(result.error, "error");
|
90 |
+
console.log(result.error);
|
91 |
+
return;
|
92 |
+
}
|
93 |
+
text_result.innerHTML = result.caption;
|
94 |
+
}
|
95 |
});
|
static/scripts/summerize.js
CHANGED
@@ -63,11 +63,10 @@ form.addEventListener("submit", async (e) => {
|
|
63 |
/*if (file === null) {
|
64 |
console.error("No file choosed");
|
65 |
} else {
|
66 |
-
const formData = new FormData();
|
67 |
formData.append("file", file);
|
68 |
const res = await fetch("/summarize", {
|
69 |
method: "post",
|
70 |
-
|
71 |
body: formData,
|
72 |
});
|
73 |
const data = await res.json();
|
@@ -99,7 +98,6 @@ const typing = (text, element, parent) => {
|
|
99 |
if (textsplit.length > 0) {
|
100 |
element.innerHTML += `${textsplit.shift()} `;
|
101 |
parent.scrollIntoView({ behavior: "smooth" });
|
102 |
-
|
103 |
element.scrollIntoView({ behavior: "smooth" });
|
104 |
} else {
|
105 |
clearInterval(interval);
|
|
|
63 |
/*if (file === null) {
|
64 |
console.error("No file choosed");
|
65 |
} else {
|
66 |
+
/*const formData = new FormData();
|
67 |
formData.append("file", file);
|
68 |
const res = await fetch("/summarize", {
|
69 |
method: "post",
|
|
|
70 |
body: formData,
|
71 |
});
|
72 |
const data = await res.json();
|
|
|
98 |
if (textsplit.length > 0) {
|
99 |
element.innerHTML += `${textsplit.shift()} `;
|
100 |
parent.scrollIntoView({ behavior: "smooth" });
|
|
|
101 |
element.scrollIntoView({ behavior: "smooth" });
|
102 |
} else {
|
103 |
clearInterval(interval);
|
static/styles/notification.css
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
|
13 |
.notification {
|
14 |
position: relative;
|
15 |
-
width:
|
16 |
display: flex;
|
17 |
align-items: center;
|
18 |
gap: 20px;
|
|
|
12 |
|
13 |
.notification {
|
14 |
position: relative;
|
15 |
+
width: 300px;
|
16 |
display: flex;
|
17 |
align-items: center;
|
18 |
gap: 20px;
|