SantanuBanerjee commited on
Commit
42a5ed8
·
verified ·
1 Parent(s): 5ad8a00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -194,7 +194,7 @@ from collections import Counter
194
 
195
  def extract_problem_domains(df,
196
  text_column='Processed_ProblemDescription_forDomainExtraction',
197
- cluster_range=(3, 17),
198
  top_words=10):
199
  consoleMessage_and_Print("Extracting Problem Domains...")
200
 
@@ -276,7 +276,7 @@ def text_processing_for_location(text):
276
  def extract_location_clusters(df,
277
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
278
  text_column2='Geographical_Location', # User Input
279
- cluster_range=(3, 17),
280
  top_words=10):
281
  # Combine the two text columns
282
  text_column = "Combined_Location_Text"
@@ -371,7 +371,7 @@ def generate_project_proposal(prompt): # Generate the proposal
371
 
372
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
373
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
374
- model_max_token_limit = 2048 #1500
375
 
376
  try:
377
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
@@ -457,13 +457,17 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
457
 
458
  random.shuffle(shuffled_descriptions)
459
  # Prepare the prompt
460
- # problems_summary = "; \n".join(shuffled_descriptions[:7]) # Limit to first 3 for brevity
461
- problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
 
 
462
 
463
 
464
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
465
  # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
466
- prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
 
 
467
 
468
  proposal = generate_project_proposal(prompt)
469
  # Check if proposal is valid
@@ -766,9 +770,9 @@ def process_excel(file):
766
 
767
 
768
  example_files = []
769
- # example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
770
- # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
771
- example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
772
 
773
 
774
  import random
@@ -785,7 +789,7 @@ interface = gr.Interface(
785
 
786
  outputs=[
787
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
788
- gr.Textbox(label="Console Messages", lines=5, interactive=False) # Console messages output
789
  ],
790
 
791
 
 
194
 
195
  def extract_problem_domains(df,
196
  text_column='Processed_ProblemDescription_forDomainExtraction',
197
+ cluster_range=(2, 10),
198
  top_words=10):
199
  consoleMessage_and_Print("Extracting Problem Domains...")
200
 
 
276
  def extract_location_clusters(df,
277
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
278
  text_column2='Geographical_Location', # User Input
279
+ cluster_range=(2, 10),
280
  top_words=10):
281
  # Combine the two text columns
282
  text_column = "Combined_Location_Text"
 
371
 
372
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
373
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
374
+ model_max_token_limit = 1750 #2048 #1500
375
 
376
  try:
377
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
457
 
458
  random.shuffle(shuffled_descriptions)
459
  # Prepare the prompt
460
+ # problems_summary = "; \n".join(shuffled_descriptions[:3]) # Limit to first 3 for brevity
461
+ # problems_summary = "; \n".join([f"Problem: {desc}" for desc in shuffled_descriptions[:5]])
462
+ problems_summary = "; \n".join([f"Problem {i+1}: {desc}" for i, desc in enumerate(shuffled_descriptions[:7])])
463
+ # problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
464
 
465
 
466
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
467
  # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
468
+ # prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
469
+ prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\n\n {problems_summary}\n\nSingle Combined Project Proposal: \t"
470
+
471
 
472
  proposal = generate_project_proposal(prompt)
473
  # Check if proposal is valid
 
770
 
771
 
772
  example_files = []
773
+ example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
774
+ example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
775
+ # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
776
 
777
 
778
  import random
 
789
 
790
  outputs=[
791
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
792
+ gr.Textbox(label="Console Messages", lines=7, interactive=False) # Console messages output
793
  ],
794
 
795