Spaces:
Sleeping
Sleeping
pred_score fix, logging
Browse files- app.py +28 -11
- logs/app.log +99 -0
- modules/logging_config.py +23 -0
- modules/utils.py +6 -6
app.py
CHANGED
@@ -12,16 +12,22 @@ except Exception as e:
|
|
12 |
print(f"Error checking CUDA availability: {str(e)}")
|
13 |
print("Continuing with CPU...")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
import streamlit as st
|
16 |
import os
|
17 |
from huggingface_hub import login
|
18 |
from datetime import datetime
|
19 |
-
from modules.auth import validate_login
|
20 |
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data
|
21 |
|
22 |
# Local
|
23 |
-
|
24 |
-
|
25 |
|
26 |
|
27 |
# Main app logic
|
@@ -104,26 +110,37 @@ def main():
|
|
104 |
|
105 |
if uploaded_file is not None:
|
106 |
try:
|
|
|
|
|
107 |
if not st.session_state['data_processed']:
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
df = st.session_state['df']
|
112 |
-
|
113 |
# Get the current date
|
114 |
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
|
115 |
-
output_filename = 'processed_applications_
|
116 |
|
117 |
-
|
118 |
-
df.to_csv(
|
|
|
|
|
119 |
st.download_button(
|
120 |
label="Download data as CSV",
|
121 |
-
data=
|
122 |
file_name=output_filename,
|
123 |
mime='text/csv',
|
124 |
)
|
125 |
|
126 |
-
except:
|
|
|
127 |
st.error("Failed to process the file. Please ensure your column names match the template file.")
|
128 |
|
129 |
|
|
|
12 |
print(f"Error checking CUDA availability: {str(e)}")
|
13 |
print("Continuing with CPU...")
|
14 |
|
15 |
+
|
16 |
+
from modules.logging_config import setup_logging
|
17 |
+
setup_logging()
|
18 |
+
import logging
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
import streamlit as st
|
22 |
import os
|
23 |
from huggingface_hub import login
|
24 |
from datetime import datetime
|
25 |
+
from modules.auth import validate_login
|
26 |
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data
|
27 |
|
28 |
# Local
|
29 |
+
from dotenv import load_dotenv
|
30 |
+
load_dotenv()
|
31 |
|
32 |
|
33 |
# Main app logic
|
|
|
110 |
|
111 |
if uploaded_file is not None:
|
112 |
try:
|
113 |
+
logger.info(f"File uploaded: {uploaded_file.name}")
|
114 |
+
|
115 |
if not st.session_state['data_processed']:
|
116 |
+
logger.info("Starting data processing")
|
117 |
+
try:
|
118 |
+
st.session_state['df'] = process_data(uploaded_file, sens_level)
|
119 |
+
logger.info("Data processing completed successfully")
|
120 |
+
st.session_state['data_processed'] = True
|
121 |
+
except Exception as e:
|
122 |
+
logger.error(f"Error in process_data: {str(e)}")
|
123 |
+
raise # Re-raise the exception to be caught by outer try-except
|
124 |
|
125 |
df = st.session_state['df']
|
126 |
+
|
127 |
# Get the current date
|
128 |
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
|
129 |
+
output_filename = f'processed_applications_{current_datetime}.csv'
|
130 |
|
131 |
+
# Convert DataFrame to CSV string buffer instead of file
|
132 |
+
csv_buffer = df.to_csv(index=False).encode()
|
133 |
+
logger.info("CSV buffer created successfully")
|
134 |
+
|
135 |
st.download_button(
|
136 |
label="Download data as CSV",
|
137 |
+
data=csv_buffer,
|
138 |
file_name=output_filename,
|
139 |
mime='text/csv',
|
140 |
)
|
141 |
|
142 |
+
except Exception as e:
|
143 |
+
logger.error(f"Error processing file: {str(e)}")
|
144 |
st.error("Failed to process the file. Please ensure your column names match the template file.")
|
145 |
|
146 |
|
logs/app.log
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-04 20:16:29,467 - datasets - INFO - PyTorch version 2.5.1 available.
|
2 |
+
2025-02-04 20:16:31,199 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
3 |
+
2025-02-04 20:16:37,895 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
4 |
+
2025-02-04 20:16:38,062 - modules.utils - INFO - T1 df import
|
5 |
+
2025-02-04 20:16:38,062 - modules.utils - INFO - T2 columns renamed
|
6 |
+
2025-02-04 20:16:38,249 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
7 |
+
2025-02-04 20:16:38,249 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
8 |
+
2025-02-04 20:16:44,645 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
9 |
+
2025-02-04 20:16:44,645 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
10 |
+
2025-02-04 20:16:49,350 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
11 |
+
2025-02-04 20:16:49,350 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
|
12 |
+
2025-02-04 20:16:55,639 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
13 |
+
2025-02-04 20:16:55,639 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
|
14 |
+
2025-02-04 20:17:00,538 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
15 |
+
2025-02-04 20:17:00,539 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
|
16 |
+
2025-02-04 20:21:02,708 - datasets - INFO - PyTorch version 2.5.1 available.
|
17 |
+
2025-02-04 20:21:05,106 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
18 |
+
2025-02-04 20:21:10,682 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
19 |
+
2025-02-04 20:21:10,849 - modules.utils - INFO - T1 df import
|
20 |
+
2025-02-04 20:21:10,850 - modules.utils - INFO - T2 columns renamed
|
21 |
+
2025-02-04 20:21:10,994 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
22 |
+
2025-02-04 20:21:10,994 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
23 |
+
2025-02-04 20:21:17,001 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
24 |
+
2025-02-04 20:21:17,001 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
25 |
+
2025-02-04 20:21:21,035 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
26 |
+
2025-02-04 20:21:21,035 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
|
27 |
+
2025-02-04 20:21:27,247 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
28 |
+
2025-02-04 20:21:27,247 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
|
29 |
+
2025-02-04 20:21:31,121 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
30 |
+
2025-02-04 20:21:31,121 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
|
31 |
+
2025-02-04 20:23:19,122 - datasets - INFO - PyTorch version 2.5.1 available.
|
32 |
+
2025-02-04 20:23:21,471 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
33 |
+
2025-02-04 20:23:26,565 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
34 |
+
2025-02-04 20:23:26,732 - modules.utils - INFO - T1 df import
|
35 |
+
2025-02-04 20:23:26,732 - modules.utils - INFO - T2 columns renamed
|
36 |
+
2025-02-04 20:23:26,875 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
37 |
+
2025-02-04 20:23:26,875 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
38 |
+
2025-02-04 20:23:30,614 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
39 |
+
2025-02-04 20:23:30,614 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
40 |
+
2025-02-04 20:23:33,378 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
41 |
+
2025-02-04 20:23:33,378 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
|
42 |
+
2025-02-04 20:23:37,329 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
43 |
+
2025-02-04 20:23:37,329 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
|
44 |
+
2025-02-04 20:23:41,129 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
45 |
+
2025-02-04 20:23:41,130 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
|
46 |
+
2025-02-04 20:26:27,528 - datasets - INFO - PyTorch version 2.5.1 available.
|
47 |
+
2025-02-04 20:26:29,749 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
48 |
+
2025-02-04 20:26:35,354 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
49 |
+
2025-02-04 20:26:35,512 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
|
50 |
+
2025-02-04 20:26:35,513 - __main__ - INFO - Starting data processing...
|
51 |
+
2025-02-04 20:26:35,519 - modules.utils - INFO - T1 df import
|
52 |
+
2025-02-04 20:26:35,520 - modules.utils - INFO - T2 columns renamed
|
53 |
+
2025-02-04 20:26:35,658 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
54 |
+
2025-02-04 20:26:35,658 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
55 |
+
2025-02-04 20:26:39,590 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
56 |
+
2025-02-04 20:26:39,591 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
57 |
+
2025-02-04 20:26:43,283 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
58 |
+
2025-02-04 20:26:43,283 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
|
59 |
+
2025-02-04 20:26:47,454 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
60 |
+
2025-02-04 20:26:47,455 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
|
61 |
+
2025-02-04 20:26:50,569 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
62 |
+
2025-02-04 20:26:50,569 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
|
63 |
+
2025-02-04 20:27:00,137 - __main__ - ERROR - Error in process_data: Cannot set a DataFrame with multiple columns to the single column pred_score
|
64 |
+
2025-02-04 20:27:00,138 - __main__ - ERROR - Error processing file: Cannot set a DataFrame with multiple columns to the single column pred_score
|
65 |
+
2025-02-04 20:31:19,620 - datasets - INFO - PyTorch version 2.5.1 available.
|
66 |
+
2025-02-04 20:31:21,779 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
67 |
+
2025-02-04 20:31:26,273 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
68 |
+
2025-02-04 20:31:26,432 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
|
69 |
+
2025-02-04 20:31:26,433 - __main__ - INFO - Starting data processing...
|
70 |
+
2025-02-04 20:31:26,439 - modules.utils - INFO - T1 df import
|
71 |
+
2025-02-04 20:31:26,440 - modules.utils - INFO - T2 columns renamed
|
72 |
+
2025-02-04 20:31:26,563 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
73 |
+
2025-02-04 20:31:26,563 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
74 |
+
2025-02-04 20:31:30,934 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
75 |
+
2025-02-04 20:31:30,935 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
76 |
+
2025-02-04 20:31:33,839 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
77 |
+
2025-02-04 20:31:33,839 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
|
78 |
+
2025-02-04 20:31:37,942 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
79 |
+
2025-02-04 20:31:37,942 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
|
80 |
+
2025-02-04 20:31:41,885 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
81 |
+
2025-02-04 20:31:41,886 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
|
82 |
+
2025-02-04 20:31:51,715 - __main__ - INFO - Data processing completed successfully
|
83 |
+
2025-02-04 20:31:51,715 - __main__ - INFO - DataFrame columns: ['id', 'scope_txt', 'tech_txt', 'fin_txt', 'maf_funding', 'cont_public', 'cont_private', 'cont_other', 'scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2', 'ADAPMIT', 'SECTOR1', 'SECTOR2', 'LANG', 'lev_total', 'lev_gt_0', 'lev_maf_%', 'lev_maf_scale', 'pred_score', 'pred_action']
|
84 |
+
2025-02-04 20:31:51,715 - __main__ - INFO - DataFrame shape: (14, 23)
|
85 |
+
2025-02-04 20:31:51,718 - __main__ - INFO - CSV buffer created successfully
|
86 |
+
2025-02-04 20:31:56,984 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
87 |
+
2025-02-04 20:31:57,147 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
|
88 |
+
2025-02-04 20:31:57,148 - __main__ - INFO - DataFrame columns: ['id', 'scope_txt', 'tech_txt', 'fin_txt', 'maf_funding', 'cont_public', 'cont_private', 'cont_other', 'scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2', 'ADAPMIT', 'SECTOR1', 'SECTOR2', 'LANG', 'lev_total', 'lev_gt_0', 'lev_maf_%', 'lev_maf_scale', 'pred_score', 'pred_action']
|
89 |
+
2025-02-04 20:31:57,148 - __main__ - INFO - DataFrame shape: (14, 23)
|
90 |
+
2025-02-04 20:31:57,150 - __main__ - INFO - CSV buffer created successfully
|
91 |
+
2025-02-04 20:33:58,763 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
92 |
+
2025-02-04 20:34:06,910 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
|
93 |
+
2025-02-04 20:34:07,067 - __main__ - INFO - File uploaded: MAF_2024_FULL.xlsx
|
94 |
+
2025-02-04 20:34:07,068 - __main__ - INFO - Starting data processing
|
95 |
+
2025-02-04 20:34:07,121 - modules.utils - INFO - data import successful
|
96 |
+
2025-02-04 20:34:07,673 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
97 |
+
2025-02-04 20:34:07,675 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
|
98 |
+
2025-02-04 20:34:59,028 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
|
99 |
+
2025-02-04 20:34:59,028 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
|
modules/logging_config.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from logging.handlers import RotatingFileHandler
|
3 |
+
import os
|
4 |
+
|
5 |
+
def setup_logging():
|
6 |
+
# Set up logging
|
7 |
+
log_dir = 'logs'
|
8 |
+
os.makedirs(log_dir, exist_ok=True)
|
9 |
+
log_file = os.path.join(log_dir, 'app.log')
|
10 |
+
|
11 |
+
# Create a RotatingFileHandler
|
12 |
+
file_handler = RotatingFileHandler(log_file, maxBytes=1024 * 1024, backupCount=5)
|
13 |
+
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
14 |
+
|
15 |
+
# Configure the root logger
|
16 |
+
logging.basicConfig(level=logging.INFO,
|
17 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
18 |
+
handlers=[file_handler, logging.StreamHandler()])
|
19 |
+
|
20 |
+
if __name__ == "__main__":
|
21 |
+
setup_logging()
|
22 |
+
logger = logging.getLogger(__name__)
|
23 |
+
logger.info("Logging setup completed")
|
modules/utils.py
CHANGED
@@ -9,7 +9,9 @@ from transformers import pipeline
|
|
9 |
from openpyxl import Workbook
|
10 |
from openpyxl.styles import Font, NamedStyle, PatternFill
|
11 |
from openpyxl.styles.differential import DifferentialStyle
|
|
|
12 |
|
|
|
13 |
|
14 |
# Function for creating Upload template file
|
15 |
def create_excel():
|
@@ -111,20 +113,18 @@ def predict_category(df, model_name, progress_bar, repo, profile, multilabel=Fal
|
|
111 |
# Main function to process data
|
112 |
def process_data(uploaded_file, sens_level):
|
113 |
df = pd.read_excel(uploaded_file)
|
114 |
-
|
115 |
-
|
116 |
df.rename(columns={
|
117 |
'id': 'id',
|
118 |
'scope': 'scope_txt',
|
119 |
'technology': 'tech_txt',
|
120 |
'financial': 'fin_txt',
|
121 |
-
'barrier': 'bar_txt',
|
122 |
'maf_funding_requested':'maf_funding',
|
123 |
'contributions_public_sector':'cont_public',
|
124 |
'contributions_private_sector':'cont_private',
|
125 |
'contributions_other':'cont_other'}, inplace=True)
|
126 |
-
|
127 |
-
|
128 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
|
129 |
df.fillna('', inplace=True)
|
130 |
df[['scope_txt', 'tech_txt', 'fin_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt']].applymap(clean_text)
|
@@ -209,7 +209,7 @@ def process_data(uploaded_file, sens_level):
|
|
209 |
|
210 |
# Further data processing and actions
|
211 |
sector_classes = ['Energy','Transport','Industries']
|
212 |
-
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0']+
|
213 |
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
|
214 |
|
215 |
return df
|
|
|
9 |
from openpyxl import Workbook
|
10 |
from openpyxl.styles import Font, NamedStyle, PatternFill
|
11 |
from openpyxl.styles.differential import DifferentialStyle
|
12 |
+
import logging
|
13 |
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
|
16 |
# Function for creating Upload template file
|
17 |
def create_excel():
|
|
|
113 |
# Main function to process data
|
114 |
def process_data(uploaded_file, sens_level):
|
115 |
df = pd.read_excel(uploaded_file)
|
116 |
+
logger.info(f"data import successful")
|
117 |
+
# Rename columns
|
118 |
df.rename(columns={
|
119 |
'id': 'id',
|
120 |
'scope': 'scope_txt',
|
121 |
'technology': 'tech_txt',
|
122 |
'financial': 'fin_txt',
|
|
|
123 |
'maf_funding_requested':'maf_funding',
|
124 |
'contributions_public_sector':'cont_public',
|
125 |
'contributions_private_sector':'cont_private',
|
126 |
'contributions_other':'cont_other'}, inplace=True)
|
127 |
+
# clean the text fields
|
|
|
128 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
|
129 |
df.fillna('', inplace=True)
|
130 |
df[['scope_txt', 'tech_txt', 'fin_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt']].applymap(clean_text)
|
|
|
209 |
|
210 |
# Further data processing and actions
|
211 |
sector_classes = ['Energy','Transport','Industries']
|
212 |
+
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0']+x['lev_maf_scale'])/10*10,0), axis=1)
|
213 |
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
|
214 |
|
215 |
return df
|