SantanuBanerjee commited on
Commit
b59ee01
·
verified ·
1 Parent(s): ac713f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -29
app.py CHANGED
@@ -359,22 +359,18 @@ def create_cluster_dataframes(processed_df):
359
  return budget_cluster_df, problem_cluster_df
360
 
361
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
362
- def generate_project_proposal(problem_descriptions, location, problem_domain):
363
  print("Trying to access gpt-neo-1.3B")
364
- print("problem_descriptions: ", problem_descriptions)
365
- print("location: ", location)
366
- print("problem_domain: ", problem_domain)
367
-
368
- model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
369
- tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
370
-
371
- # Prepare the prompt
372
- problems_summary = "; ".join(problem_descriptions[:3]) # Limit to first 3 for brevity
373
- # problems_summary = "; ".join(problem_descriptions)
374
- # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
375
- prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
376
-
377
- # Generate the proposal
378
  try:
379
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
380
  print("Input IDs shape:", input_ids.shape)
@@ -417,14 +413,21 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
417
  print("problem_descriptions: ",problem_descriptions)
418
 
419
  if problem_descriptions and not pd.isna(problem_descriptions):
 
420
  print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
 
421
  # console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
422
- proposal = generate_project_proposal(
423
- problem_descriptions,
424
- location,
425
- problem_domain)
 
 
 
 
426
  proposals[(loc, prob)] = proposal
427
  print("Generated Proposal: ", proposal)
 
428
  else:
429
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
430
 
@@ -509,9 +512,6 @@ def nlp_pipeline(original_df):
509
  except Exception as e:
510
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
511
  console_messages.append("NLP pipeline for location extraction completed.")
512
-
513
-
514
-
515
 
516
 
517
  # Create cluster dataframes
@@ -525,8 +525,8 @@ def nlp_pipeline(original_df):
525
  # # Generate project proposals
526
  # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
527
  # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
528
- print("\n location_clusters_2: ", location_clusters)
529
- print("\n problem_clusters_2: ", problem_clusters)
530
  project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
531
 
532
  console_messages.append("NLP pipeline completed.")
@@ -556,11 +556,11 @@ def process_excel(file):
556
 
557
  output_filename = "OutPut_PPs.xlsx"
558
  with pd.ExcelWriter(output_filename) as writer:
559
- # ### Convert project_proposals dictionary to DataFrame
560
- # project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
561
- # project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
562
- # project_proposals_df.reset_index(inplace=True)
563
- # project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
564
 
565
  budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
566
  problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
 
359
  return budget_cluster_df, problem_cluster_df
360
 
361
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
362
+ def generate_project_proposal(prompt):
363
  print("Trying to access gpt-neo-1.3B")
364
+ print("prompt: \t", prompt)
365
+ try:
366
+ # Generate the proposal
367
+ model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
368
+ tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
369
+ except Exception as e:
370
+ print("Error loading models:", str(e))
371
+ console_messages.append("\n Error Loading Models")
372
+ return prompt
373
+
 
 
 
 
374
  try:
375
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
376
  print("Input IDs shape:", input_ids.shape)
 
413
  print("problem_descriptions: ",problem_descriptions)
414
 
415
  if problem_descriptions and not pd.isna(problem_descriptions):
416
+
417
  print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
418
+
419
  # console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
420
+
421
+ # Prepare the prompt
422
+ problems_summary = "; ".join(problem_descriptions[:3]) # Limit to first 3 for brevity
423
+ # problems_summary = "; ".join(problem_descriptions)
424
+ # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
425
+ prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
426
+
427
+ proposal = generate_project_proposal(prompt)
428
  proposals[(loc, prob)] = proposal
429
  print("Generated Proposal: ", proposal)
430
+
431
  else:
432
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
433
 
 
512
  except Exception as e:
513
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
514
  console_messages.append("NLP pipeline for location extraction completed.")
 
 
 
515
 
516
 
517
  # Create cluster dataframes
 
525
  # # Generate project proposals
526
  # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
527
  # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
528
+ # print("\n location_clusters_2: ", location_clusters)
529
+ # print("\n problem_clusters_2: ", problem_clusters)
530
  project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
531
 
532
  console_messages.append("NLP pipeline completed.")
 
556
 
557
  output_filename = "OutPut_PPs.xlsx"
558
  with pd.ExcelWriter(output_filename) as writer:
559
+ ### Convert project_proposals dictionary to DataFrame
560
+ project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
561
+ project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
562
+ project_proposals_df.reset_index(inplace=True)
563
+ project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
564
 
565
  budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
566
  problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')