SantanuBanerjee commited on
Commit
3a814a1
·
verified ·
1 Parent(s): 4259c64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -10
app.py CHANGED
@@ -341,6 +341,81 @@ def extract_location_clusters(df,
341
 
342
 
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  def nlp_pipeline(original_df):
345
  console_messages.append("Starting NLP pipeline...")
346
 
@@ -380,12 +455,25 @@ def nlp_pipeline(original_df):
380
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
381
  console_messages.append("NLP pipeline for location extraction completed.")
382
 
 
 
383
 
384
- console_messages.append("NLP pipeline completed.")
385
- return processed_df
386
 
387
-
 
 
 
 
 
 
 
 
 
388
 
 
 
 
 
389
 
390
 
391
 
@@ -400,14 +488,17 @@ def process_excel(file):
400
  try:
401
  # Process the DataFrame
402
  console_messages.append("Processing the DataFrame...")
403
- result_df = nlp_pipeline(df)
404
 
405
- # output_file = "Output_ProjectProposals.xlsx"
406
- output_file = "Output_Proposals.xlsx"
407
- result_df.to_excel(output_file, index=False)
 
 
 
408
 
409
  console_messages.append("Processing completed. Ready for download.")
410
- return output_file, "\n".join(console_messages) # Return the processed DataFrame as Excel file
411
 
412
  except Exception as e:
413
  # return str(e) # Return the error message
@@ -422,8 +513,8 @@ def process_excel(file):
422
 
423
 
424
  example_files = []
425
- # example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
426
- example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
427
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
428
 
429
 
 
341
 
342
 
343
 
344
+
345
+
346
+
347
+ def create_cluster_dataframes(processed_df):
348
+ # Create a dataframe for Financial Weights
349
+ budget_cluster_df = processed_df.pivot_table(
350
+ values='Financial_Weight',
351
+ index='Location_Cluster',
352
+ columns='Problem_Cluster',
353
+ aggfunc='sum',
354
+ fill_value=0)
355
+
356
+ # Create a dataframe for Problem Descriptions
357
+ problem_cluster_df = processed_df.groupby(['Location_Cluster', 'Problem_Cluster'])['Problem_Description'].apply(list).unstack()
358
+
359
+ return budget_cluster_df, problem_cluster_df
360
+
361
+ from transformers import GPTNeoForCausalLM, GPT2Tokenizer
362
+ def generate_project_proposal(problem_descriptions, location, problem_domain):
363
+ model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
364
+ tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
365
+
366
+ # Prepare the prompt
367
+ problems_summary = "; ".join(problem_descriptions[:3]) # Limit to first 3 for brevity
368
+ # problems_summary = "; ".join(problem_descriptions)
369
+ # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
370
+ prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
371
+
372
+ # Generate the proposal
373
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
374
+ output = model.generate(
375
+ input_ids,
376
+ max_length=300,
377
+ num_return_sequences=1,
378
+ no_repeat_ngram_size=2,
379
+ temperature=0.75)
380
+
381
+ proposal = tokenizer.decode(output[0], skip_special_tokens=True)
382
+ return proposal
383
+
384
+ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters):
385
+ proposals = {}
386
+ for loc in budget_cluster_df.index:
387
+ for prob in budget_cluster_df.columns:
388
+ location = ", ".join(location_clusters[loc])
389
+ problem_domain = ", ".join(problem_clusters[prob])
390
+ problem_descriptions = problem_cluster_df.loc[loc, prob]
391
+
392
+ if problem_descriptions:
393
+ proposal = generate_project_proposal(
394
+ problem_descriptions,
395
+ location,
396
+ problem_domain)
397
+ proposals[(loc, prob)] = proposal
398
+
399
+ return proposals
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+
412
+
413
+
414
+
415
+
416
+
417
+
418
+
419
  def nlp_pipeline(original_df):
420
  console_messages.append("Starting NLP pipeline...")
421
 
 
455
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
456
  console_messages.append("NLP pipeline for location extraction completed.")
457
 
458
+
459
+
460
 
 
 
461
 
462
+ # Create cluster dataframes
463
+ budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
464
+
465
+ # Generate project proposals
466
+ location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
467
+ problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
468
+ project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
469
+
470
+ console_messages.append("NLP pipeline completed.")
471
+ return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
472
 
473
+
474
+
475
+
476
+
477
 
478
 
479
 
 
488
  try:
489
  # Process the DataFrame
490
  console_messages.append("Processing the DataFrame...")
491
+ processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
492
 
493
+ output_filename = "OutPut_PPs.xlsx"
494
+ with pd.ExcelWriter(output_filename) as writer:
495
+ project_proposals.to_excel(writer, sheet_name='Project_Proposals', index=False)
496
+ budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
497
+ problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
498
+ processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
499
 
500
  console_messages.append("Processing completed. Ready for download.")
501
+ return output_filename, "\n".join(console_messages) # Return the processed DataFrame as Excel file
502
 
503
  except Exception as e:
504
  # return str(e) # Return the error message
 
513
 
514
 
515
  example_files = []
516
+ example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
517
+ # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
518
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
519
 
520