Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -68,8 +68,89 @@ def data_pre_processing(file_responses):
|
|
68 |
except Exception as e:
|
69 |
return str(e)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def nlp_pipeline(original_df):
|
|
|
72 |
processed_df = data_pre_processing(original_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
return processed_df
|
74 |
|
75 |
def process_excel(file):
|
@@ -138,10 +219,10 @@ interface = gr.Interface(
|
|
138 |
"<p style='font-size: 12px; color: gray; text-align: center'>This tool allows for the systematic evaluation and proposal of solutions tailored to specific location-problem pairs, ensuring efficient resource allocation and project planning. For more information, visit <a href='https://santanban.github.io/TaxDirection/' target='_blank'>#TaxDirection weblink</a>.</p>"
|
139 |
|
140 |
"<p style='font-weight: bold; font-size: 16px; color: blue;'>Upload an Excel file to process and download the result or use the Example files:</p>"
|
141 |
-
"<p style='font-weight: bold; font-size:
|
142 |
|
143 |
"<p style='font-weight: bold; font-size: 14px; color: green; text-align: right;'>Processed output contains a Project Proposal for each Location~Problem paired combination (i.e. each cell).</p>"
|
144 |
-
"<p style='font-weight: bold; font-size:
|
145 |
|
146 |
|
147 |
"<p style='font-size: 12px; color: gray; text-align: center'>Note: The example files provided above are for demonstration purposes. Feel free to upload your own Excel files to see the results. If you have any questions, refer to the documentation-links or contact <a href='https://www.change.org/p/democracy-evolution-ensuring-humanity-s-eternal-existence-through-taxdirection' target='_blank'>support</a>.</p>"
|
|
|
68 |
except Exception as e:
|
69 |
return str(e)
|
70 |
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
import spacy
|
79 |
+
from transformers import AutoTokenizer, AutoModel
|
80 |
+
import torch
|
81 |
+
|
82 |
+
# Load SpaCy model
|
83 |
+
nlp = spacy.load('en_core_web_sm')
|
84 |
+
|
85 |
+
# Load Hugging Face Transformers model
|
86 |
+
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
87 |
+
model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
88 |
+
|
89 |
+
# def combined_text_processing(text):
|
90 |
+
# # Basic NLP processing using SpaCy
|
91 |
+
# doc = nlp(text)
|
92 |
+
# lemmatized_text = ' '.join([token.lemma_ for token in doc])
|
93 |
+
|
94 |
+
# # Advanced text representation using Hugging Face Transformers
|
95 |
+
# inputs = tokenizer(lemmatized_text, return_tensors="pt", truncation=False, padding=True)
|
96 |
+
# with torch.no_grad():
|
97 |
+
# outputs = model(**inputs)
|
98 |
+
|
99 |
+
# return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
|
100 |
+
|
101 |
+
|
102 |
+
import re
|
103 |
+
from nltk.corpus import stopwords
|
104 |
+
from nltk.tokenize import word_tokenize
|
105 |
+
|
106 |
+
def combined_text_processing(text):
|
107 |
+
# Remove punctuation, numbers, URLs, and special characters
|
108 |
+
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation and special characters
|
109 |
+
text = re.sub(r'\d+', '', text) # Remove numbers
|
110 |
+
text = re.sub(r'http\S+', '', text) # Remove URLs
|
111 |
+
|
112 |
+
# Tokenize and remove stopwords
|
113 |
+
tokens = word_tokenize(text.lower()) # Convert to lowercase
|
114 |
+
stop_words = set(stopwords.words('english'))
|
115 |
+
tokens = [word for word in tokens if word not in stop_words]
|
116 |
+
|
117 |
+
# Lemmatize tokens using SpaCy
|
118 |
+
doc = nlp(' '.join(tokens))
|
119 |
+
lemmatized_text = ' '.join([token.lemma_ for token in doc])
|
120 |
+
|
121 |
+
# Apply Hugging Face Transformers
|
122 |
+
inputs = tokenizer(lemmatized_text, return_tensors="pt", truncation=False, padding=True)
|
123 |
+
with torch.no_grad():
|
124 |
+
outputs = model(**inputs)
|
125 |
+
|
126 |
+
return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
def nlp_pipeline(original_df):
|
145 |
+
# Data Preprocessing
|
146 |
processed_df = data_pre_processing(original_df)
|
147 |
+
|
148 |
+
|
149 |
+
# Apply the combined function to your DataFrame
|
150 |
+
processed_df['Processed_ProblemDescription'] = processed_df['Problem_Description'].apply(combined_text_processing)
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
return processed_df
|
155 |
|
156 |
def process_excel(file):
|
|
|
219 |
"<p style='font-size: 12px; color: gray; text-align: center'>This tool allows for the systematic evaluation and proposal of solutions tailored to specific location-problem pairs, ensuring efficient resource allocation and project planning. For more information, visit <a href='https://santanban.github.io/TaxDirection/' target='_blank'>#TaxDirection weblink</a>.</p>"
|
220 |
|
221 |
"<p style='font-weight: bold; font-size: 16px; color: blue;'>Upload an Excel file to process and download the result or use the Example files:</p>"
|
222 |
+
"<p style='font-weight: bold; font-size: 15px; color: blue;'>(click on any of them to directly process the file and Download the result)</p>"
|
223 |
|
224 |
"<p style='font-weight: bold; font-size: 14px; color: green; text-align: right;'>Processed output contains a Project Proposal for each Location~Problem paired combination (i.e. each cell).</p>"
|
225 |
+
"<p style='font-weight: bold; font-size: 13px; color: green; text-align: right;'>Corresponding Budget Allocation and estimated Project Completion Time are provided in different sheets.</p>"
|
226 |
|
227 |
|
228 |
"<p style='font-size: 12px; color: gray; text-align: center'>Note: The example files provided above are for demonstration purposes. Feel free to upload your own Excel files to see the results. If you have any questions, refer to the documentation-links or contact <a href='https://www.change.org/p/democracy-evolution-ensuring-humanity-s-eternal-existence-through-taxdirection' target='_blank'>support</a>.</p>"
|