SantanuBanerjee commited on
Commit
5ad8a00
·
verified ·
1 Parent(s): b6f8dc2

Update app.py

Browse files

This is the best version that can be run. If this doesn't work, you can always use the previous setting which is the MVP

Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -194,8 +194,8 @@ from collections import Counter
194
 
195
  def extract_problem_domains(df,
196
  text_column='Processed_ProblemDescription_forDomainExtraction',
197
- cluster_range=(6, 8),
198
- top_words=7):
199
  consoleMessage_and_Print("Extracting Problem Domains...")
200
 
201
  # Sentence Transformers approach
@@ -276,8 +276,8 @@ def text_processing_for_location(text):
276
  def extract_location_clusters(df,
277
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
278
  text_column2='Geographical_Location', # User Input
279
- cluster_range=(3, 5),
280
- top_words=3):
281
  # Combine the two text columns
282
  text_column = "Combined_Location_Text"
283
  df[text_column] = df[text_column1] + ' ' + df[text_column2]
@@ -361,17 +361,17 @@ from transformers import GPTNeoForCausalLM, GPT2Tokenizer
361
  def generate_project_proposal(prompt): # Generate the proposal
362
  default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
363
 
364
- # model_Name = "EleutherAI/gpt-neo-2.7B"
365
- # tempareCHUR = uniform(0.3,0.6)
366
 
367
- model_Name = "EleutherAI/gpt-neo-1.3B"
368
- tempareCHUR = uniform(0.5,0.8)
369
 
370
  consoleMessage_and_Print(f"Trying to access {model_Name} model. The Prompt is: \n{prompt}")
371
 
372
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
373
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
374
- model_max_token_limit = 1500 #2048
375
 
376
  try:
377
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
@@ -457,9 +457,8 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
457
 
458
  random.shuffle(shuffled_descriptions)
459
  # Prepare the prompt
460
- # problems_summary = "; \n".join(problem_descriptions) # Join all problem descriptions
461
- # problems_summary = "; \n".join(problem_descriptions[:3]) # Limit to first 3 for brevity
462
- problems_summary = "; \n".join(shuffled_descriptions[:7])
463
 
464
 
465
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
@@ -767,9 +766,9 @@ def process_excel(file):
767
 
768
 
769
  example_files = []
770
- example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
771
  # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
772
- # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
773
 
774
 
775
  import random
 
194
 
195
  def extract_problem_domains(df,
196
  text_column='Processed_ProblemDescription_forDomainExtraction',
197
+ cluster_range=(3, 17),
198
+ top_words=10):
199
  consoleMessage_and_Print("Extracting Problem Domains...")
200
 
201
  # Sentence Transformers approach
 
276
  def extract_location_clusters(df,
277
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
278
  text_column2='Geographical_Location', # User Input
279
+ cluster_range=(3, 17),
280
+ top_words=10):
281
  # Combine the two text columns
282
  text_column = "Combined_Location_Text"
283
  df[text_column] = df[text_column1] + ' ' + df[text_column2]
 
361
  def generate_project_proposal(prompt): # Generate the proposal
362
  default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
363
 
364
+ model_Name = "EleutherAI/gpt-neo-2.7B"
365
+ tempareCHUR = uniform(0.3,0.6)
366
 
367
+ # model_Name = "EleutherAI/gpt-neo-1.3B"
368
+ # tempareCHUR = uniform(0.5,0.8)
369
 
370
  consoleMessage_and_Print(f"Trying to access {model_Name} model. The Prompt is: \n{prompt}")
371
 
372
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
373
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
374
+ model_max_token_limit = 2048 #1500
375
 
376
  try:
377
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
457
 
458
  random.shuffle(shuffled_descriptions)
459
  # Prepare the prompt
460
+ # problems_summary = "; \n".join(shuffled_descriptions[:7]) # Limit to first 3 for brevity
461
+ problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
 
462
 
463
 
464
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
 
766
 
767
 
768
  example_files = []
769
+ # example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
770
  # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
771
+ example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
772
 
773
 
774
  import random