SantanuBanerjee commited on
Commit
8c3b0f0
·
verified ·
1 Parent(s): e9c524f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -2
app.py CHANGED
@@ -68,8 +68,89 @@ def data_pre_processing(file_responses):
68
  except Exception as e:
69
  return str(e)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def nlp_pipeline(original_df):
 
72
  processed_df = data_pre_processing(original_df)
 
 
 
 
 
 
 
73
  return processed_df
74
 
75
  def process_excel(file):
@@ -138,10 +219,10 @@ interface = gr.Interface(
138
  "<p style='font-size: 12px; color: gray; text-align: center'>This tool allows for the systematic evaluation and proposal of solutions tailored to specific location-problem pairs, ensuring efficient resource allocation and project planning. For more information, visit <a href='https://santanban.github.io/TaxDirection/' target='_blank'>#TaxDirection weblink</a>.</p>"
139
 
140
  "<p style='font-weight: bold; font-size: 16px; color: blue;'>Upload an Excel file to process and download the result or use the Example files:</p>"
141
- "<p style='font-weight: bold; font-size: 16px; color: blue;'>(click on any of them to directly process the file and Download the result)</p>"
142
 
143
  "<p style='font-weight: bold; font-size: 14px; color: green; text-align: right;'>Processed output contains a Project Proposal for each Location~Problem paired combination (i.e. each cell).</p>"
144
- "<p style='font-weight: bold; font-size: 14px; color: green; text-align: right;'>Corresponding Budget Allocation and estimated Project Completion Time are provided in different sheets.</p>"
145
 
146
 
147
  "<p style='font-size: 12px; color: gray; text-align: center'>Note: The example files provided above are for demonstration purposes. Feel free to upload your own Excel files to see the results. If you have any questions, refer to the documentation-links or contact <a href='https://www.change.org/p/democracy-evolution-ensuring-humanity-s-eternal-existence-through-taxdirection' target='_blank'>support</a>.</p>"
 
68
  except Exception as e:
69
  return str(e)
70
 
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+ import spacy
79
+ from transformers import AutoTokenizer, AutoModel
80
+ import torch
81
+
82
+ # Load SpaCy model
83
+ nlp = spacy.load('en_core_web_sm')
84
+
85
+ # Load Hugging Face Transformers model
86
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
87
+ model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
88
+
89
+ # def combined_text_processing(text):
90
+ # # Basic NLP processing using SpaCy
91
+ # doc = nlp(text)
92
+ # lemmatized_text = ' '.join([token.lemma_ for token in doc])
93
+
94
+ # # Advanced text representation using Hugging Face Transformers
95
+ # inputs = tokenizer(lemmatized_text, return_tensors="pt", truncation=False, padding=True)
96
+ # with torch.no_grad():
97
+ # outputs = model(**inputs)
98
+
99
+ # return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
100
+
101
+
102
+ import re
103
+ from nltk.corpus import stopwords
104
+ from nltk.tokenize import word_tokenize
105
+
106
+ def combined_text_processing(text):
107
+ # Remove punctuation, numbers, URLs, and special characters
108
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation and special characters
109
+ text = re.sub(r'\d+', '', text) # Remove numbers
110
+ text = re.sub(r'http\S+', '', text) # Remove URLs
111
+
112
+ # Tokenize and remove stopwords
113
+ tokens = word_tokenize(text.lower()) # Convert to lowercase
114
+ stop_words = set(stopwords.words('english'))
115
+ tokens = [word for word in tokens if word not in stop_words]
116
+
117
+ # Lemmatize tokens using SpaCy
118
+ doc = nlp(' '.join(tokens))
119
+ lemmatized_text = ' '.join([token.lemma_ for token in doc])
120
+
121
+ # Apply Hugging Face Transformers
122
+ inputs = tokenizer(lemmatized_text, return_tensors="pt", truncation=False, padding=True)
123
+ with torch.no_grad():
124
+ outputs = model(**inputs)
125
+
126
+ return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
  def nlp_pipeline(original_df):
145
+ # Data Preprocessing
146
  processed_df = data_pre_processing(original_df)
147
+
148
+
149
+ # Apply the combined function to your DataFrame
150
+ processed_df['Processed_ProblemDescription'] = processed_df['Problem_Description'].apply(combined_text_processing)
151
+
152
+
153
+
154
  return processed_df
155
 
156
  def process_excel(file):
 
219
  "<p style='font-size: 12px; color: gray; text-align: center'>This tool allows for the systematic evaluation and proposal of solutions tailored to specific location-problem pairs, ensuring efficient resource allocation and project planning. For more information, visit <a href='https://santanban.github.io/TaxDirection/' target='_blank'>#TaxDirection weblink</a>.</p>"
220
 
221
  "<p style='font-weight: bold; font-size: 16px; color: blue;'>Upload an Excel file to process and download the result or use the Example files:</p>"
222
+ "<p style='font-weight: bold; font-size: 15px; color: blue;'>(click on any of them to directly process the file and Download the result)</p>"
223
 
224
  "<p style='font-weight: bold; font-size: 14px; color: green; text-align: right;'>Processed output contains a Project Proposal for each Location~Problem paired combination (i.e. each cell).</p>"
225
+ "<p style='font-weight: bold; font-size: 13px; color: green; text-align: right;'>Corresponding Budget Allocation and estimated Project Completion Time are provided in different sheets.</p>"
226
 
227
 
228
  "<p style='font-size: 12px; color: gray; text-align: center'>Note: The example files provided above are for demonstration purposes. Feel free to upload your own Excel files to see the results. If you have any questions, refer to the documentation-links or contact <a href='https://www.change.org/p/democracy-evolution-ensuring-humanity-s-eternal-existence-through-taxdirection' target='_blank'>support</a>.</p>"