Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,8 @@ import categories
|
|
18 |
from categories import Category
|
19 |
from main import process_image, process_pdf
|
20 |
|
|
|
|
|
21 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
22 |
PDF_IFRAME = """
|
23 |
<div style="border-radius: 10px; width: 100%; overflow: hidden;">
|
@@ -109,6 +111,52 @@ def extract_text(input_file):
|
|
109 |
return text
|
110 |
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
def categorize_text(text):
|
113 |
"""Takes the extracted text and updates the category"""
|
114 |
category = categories.categorize_text(text)
|
@@ -241,6 +289,7 @@ def process_and_output_files(input_files):
|
|
241 |
for file in input_files:
|
242 |
# Extract and categorize text for each file
|
243 |
text = extract_text(file)
|
|
|
244 |
category = categorize_text(text)
|
245 |
|
246 |
chatbot_response = query(category, text) # Convert the generator to a list
|
@@ -256,6 +305,9 @@ def process_and_output_files(input_files):
|
|
256 |
# chats[1]["value"][0][1] ,
|
257 |
# )
|
258 |
|
|
|
|
|
|
|
259 |
response_dict = json.loads(chats[1]["value"][0][1])
|
260 |
if category.name == "TRAVEL_CAB" :
|
261 |
# Extract the relevant data
|
@@ -265,7 +317,7 @@ def process_and_output_files(input_files):
|
|
265 |
"Nature of Expenditure": response_dict.get("summary"),
|
266 |
"Billing Date": response_dict.get("issue_date"),
|
267 |
"Bill/Invoice No.": "NA",
|
268 |
-
"Amount(Rs.)": response_dict.get("total"),
|
269 |
|
270 |
}
|
271 |
else:
|
@@ -274,12 +326,12 @@ def process_and_output_files(input_files):
|
|
274 |
"Nature of Expenditure": response_dict.get("summary"),
|
275 |
"Billing Date": response_dict.get("issue_date"),
|
276 |
"Bill/Invoice No.": response_dict.get("uids"),
|
277 |
-
"Amount(Rs.)": response_dict.get("total")
|
278 |
|
279 |
}
|
280 |
|
281 |
|
282 |
-
total_amount+=response_dict.get("total")
|
283 |
|
284 |
# Append the relevant data for this file to the data list
|
285 |
data.append(extracted_data)
|
|
|
18 |
from categories import Category
|
19 |
from main import process_image, process_pdf
|
20 |
|
21 |
+
from forex_python.converter import CurrencyRates
|
22 |
+
|
23 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
24 |
PDF_IFRAME = """
|
25 |
<div style="border-radius: 10px; width: 100%; overflow: hidden;">
|
|
|
111 |
return text
|
112 |
|
113 |
|
114 |
+
def find_currency_symbol(text):
|
115 |
+
currency_symbols = {
|
116 |
+
'USD': ['$', 'US$', 'US Dollar', 'United States Dollar'],
|
117 |
+
'EUR': ['€', 'Euro'],
|
118 |
+
'GBP': ['£', 'British Pound', 'Pound Sterling'],
|
119 |
+
'JPY': ['¥', 'Japanese Yen'],
|
120 |
+
'AUD': ['A$', 'AU$', 'Australian Dollar'],
|
121 |
+
'CAD': ['C$', 'CA$', 'Canadian Dollar'],
|
122 |
+
'CHF': ['Swiss Franc'],
|
123 |
+
'CNY': ['CN¥', 'Chinese Yuan', 'Renminbi'],
|
124 |
+
'HKD': ['HK$', 'Hong Kong Dollar'],
|
125 |
+
'NZD': ['NZ$', 'New Zealand Dollar'],
|
126 |
+
'SEK': ['Swedish Krona'],
|
127 |
+
'KRW': ['₩', 'South Korean Won'],
|
128 |
+
'SGD': ['S$', 'Singapore Dollar'],
|
129 |
+
'NOK': ['Norwegian Krone'],
|
130 |
+
'MXN': ['Mexican Peso'],
|
131 |
+
'INR': ['₹', 'Indian Rupee'],
|
132 |
+
'RUB': ['₽', 'Russian Ruble'],
|
133 |
+
'ZAR': ['South African Rand'],
|
134 |
+
'BRL': ['R$', 'Brazilian Real'],
|
135 |
+
}
|
136 |
+
|
137 |
+
detected_currency = None
|
138 |
+
|
139 |
+
for currency, symbols in currency_symbols.items():
|
140 |
+
for symbol in symbols:
|
141 |
+
if symbol in text:
|
142 |
+
detected_currency = currency
|
143 |
+
break
|
144 |
+
if detected_currency:
|
145 |
+
break
|
146 |
+
|
147 |
+
return detected_currency
|
148 |
+
|
149 |
+
def get_exchange_rate_to_inr(currency):
|
150 |
+
c = CurrencyRates()
|
151 |
+
if currency == 'INR' or currency == None:
|
152 |
+
return 1
|
153 |
+
else:
|
154 |
+
try:
|
155 |
+
exchange_rate = c.get_rate(currency, 'INR')
|
156 |
+
return exchange_rate
|
157 |
+
except:
|
158 |
+
return None
|
159 |
+
|
160 |
def categorize_text(text):
|
161 |
"""Takes the extracted text and updates the category"""
|
162 |
category = categories.categorize_text(text)
|
|
|
289 |
for file in input_files:
|
290 |
# Extract and categorize text for each file
|
291 |
text = extract_text(file)
|
292 |
+
currency = find_currency_symbol(text)
|
293 |
category = categorize_text(text)
|
294 |
|
295 |
chatbot_response = query(category, text) # Convert the generator to a list
|
|
|
305 |
# chats[1]["value"][0][1] ,
|
306 |
# )
|
307 |
|
308 |
+
exchange_rate = get_exchange_rate_to_inr(currency)
|
309 |
+
exchange_rate = float("{:.2f}".format(exchange_rate))
|
310 |
+
|
311 |
response_dict = json.loads(chats[1]["value"][0][1])
|
312 |
if category.name == "TRAVEL_CAB" :
|
313 |
# Extract the relevant data
|
|
|
317 |
"Nature of Expenditure": response_dict.get("summary"),
|
318 |
"Billing Date": response_dict.get("issue_date"),
|
319 |
"Bill/Invoice No.": "NA",
|
320 |
+
"Amount(Rs.)": response_dict.get("total") * exchange_rate,
|
321 |
|
322 |
}
|
323 |
else:
|
|
|
326 |
"Nature of Expenditure": response_dict.get("summary"),
|
327 |
"Billing Date": response_dict.get("issue_date"),
|
328 |
"Bill/Invoice No.": response_dict.get("uids"),
|
329 |
+
"Amount(Rs.)": response_dict.get("total") * exchange_rate
|
330 |
|
331 |
}
|
332 |
|
333 |
|
334 |
+
total_amount+=response_dict.get("total") * exchange_rate
|
335 |
|
336 |
# Append the relevant data for this file to the data list
|
337 |
data.append(extracted_data)
|