Vineedhar commited on
Commit
88164ab
·
verified ·
1 Parent(s): bbc90c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py CHANGED
@@ -184,10 +184,57 @@ if uploaded_file is not None:
184
 
185
  df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
186
  df5 = df5.dropna()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  st.write("## Output:")
 
189
  st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
190
  st.dataframe(df_combined)
191
 
192
  st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
193
  st.write(df5)
 
 
 
 
184
 
185
  df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
186
  df5 = df5.dropna()
187
+
188
+ sections = [
189
+ "Continue doing the following",
190
+ "Start doing the following",
191
+ "Reasons why I think that your behavior has worsened concerning the dimensions marked in the \"Perception & Change Section\" of the questionnaire",
192
+ "Further tips for your work in our organisation"
193
+ ]
194
+
195
+ patterns = {
196
+ "Boss": r"VG\n(.*?)(?=\(Boss\))",
197
+ "Colleagues": r"Ke\n(.*?)(?=\(Colleagues\))",
198
+ "Customers": r"KU\n(.*?)(?=\(Internal/external customers\))"
199
+ }
200
+
201
+ # Function to extract comments for each section
202
+ def extract_comments(data, section):
203
+ section_pattern = rf"Kom\s+{re.escape(section)}:\n(.*?)(?=(?:IX\. Open Comments|$))"
204
+ section_data = re.search(section_pattern, data, re.DOTALL)
205
+
206
+ if not section_data:
207
+ return []
208
+
209
+ section_text = section_data.group(1)
210
+ comments = []
211
+
212
+ for rater, pattern in patterns.items():
213
+ matches = re.findall(pattern, section_text, re.DOTALL)
214
+ for match in matches:
215
+ comments.append({
216
+ "Section": section,
217
+ "Rater": rater,
218
+ "Comment": match.strip()
219
+ })
220
+
221
+ return comments
222
+
223
+ # Create dataframes for each section
224
+ all_comments = []
225
+ for section in sections:
226
+ all_comments.extend(extract_comments(pdf_text, section))
227
+
228
+ df6 = pd.DataFrame(all_comments)
229
+
230
 
231
  st.write("## Output:")
232
+
233
  st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
234
  st.dataframe(df_combined)
235
 
236
  st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
237
  st.write(df5)
238
+
239
+ st.write("### 3. Extracted list of Open Comments by each Rater")
240
+ st.write(df6)