SantanuBanerjee commited on
Commit
9f9f9bd
·
verified ·
1 Parent(s): a0f12e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -25
app.py CHANGED
@@ -199,8 +199,8 @@ from collections import Counter
199
 
200
  def extract_problem_domains(df,
201
  text_column='Processed_ProblemDescription_forDomainExtraction',
202
- cluster_range=(5, 25),
203
- top_words=10):
204
  console_messages.append("Extracting Problem Domains...")
205
 
206
  # Sentence Transformers approach
@@ -282,8 +282,8 @@ def text_processing_for_location(text):
282
  def extract_location_clusters(df,
283
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
284
  text_column2='Geographical_Location', # User Input
285
- cluster_range=(5, 25),
286
- top_words=5):
287
  # Combine the two text columns
288
  text_column = "Combined_Location_Text"
289
  df[text_column] = df[text_column1] + ' ' + df[text_column2]
@@ -362,31 +362,43 @@ from transformers import GPTNeoForCausalLM, GPT2Tokenizer
362
  def generate_project_proposal(prompt):
363
  print("Trying to access gpt-neo-1.3B")
364
  print("prompt: \t", prompt)
365
- try:
366
- # Generate the proposal
367
- model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
368
- tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
369
- except Exception as e:
370
- print("Error loading models:", str(e))
371
- console_messages.append("\n Error Loading Models")
372
- return prompt
373
 
374
  try:
375
- input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
 
 
 
 
376
  print("Input IDs shape:", input_ids.shape)
377
  output = model.generate(
378
- input_ids,
379
- max_length=300,
380
- num_return_sequences=1,
 
381
  no_repeat_ngram_size=2,
382
- temperature=0.75)
 
 
383
  print("Output shape:", output.shape)
 
384
  proposal = tokenizer.decode(output[0], skip_special_tokens=True)
385
- print("Successfully accessed gpt-neo-1.3B and returning")
 
 
 
 
 
 
386
  return proposal
387
  except Exception as e:
388
  print("Error generating proposal:", str(e))
389
- return prompt
390
 
391
 
392
 
@@ -411,6 +423,7 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
411
  location = ", ".join([item.strip() for item in location_clusters[loc] if item]) # Clean and join
412
  problem_domain = ", ".join([item.strip() for item in problem_clusters[prob] if item]) # Clean and join
413
  problem_descriptions = problem_cluster_df.loc[loc, prob]
 
414
 
415
  print("location: ", location)
416
  print("problem_domain: ", problem_domain)
@@ -418,16 +431,17 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
418
 
419
  # Check if problem_descriptions is valid (not NaN and not an empty list)
420
  if isinstance(problem_descriptions, list) and problem_descriptions:
421
- print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
 
422
 
423
  # Prepare the prompt
424
- problems_summary = "; \n".join(problem_descriptions) # Join all problem descriptions
425
- prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
426
 
427
- proposal = generate_project_proposal(prompt)
428
- proposals[(loc, prob)] = proposal
429
- print("Generated Proposal: ", proposal)
430
 
 
431
  else:
432
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
433
 
 
199
 
200
  def extract_problem_domains(df,
201
  text_column='Processed_ProblemDescription_forDomainExtraction',
202
+ cluster_range=(5, 15),
203
+ top_words=7):
204
  console_messages.append("Extracting Problem Domains...")
205
 
206
  # Sentence Transformers approach
 
282
  def extract_location_clusters(df,
283
  text_column1='Processed_LocationText_forClustering', # Extracted through NLP
284
  text_column2='Geographical_Location', # User Input
285
+ cluster_range=(5, 15),
286
+ top_words=3):
287
  # Combine the two text columns
288
  text_column = "Combined_Location_Text"
289
  df[text_column] = df[text_column1] + ' ' + df[text_column2]
 
362
  def generate_project_proposal(prompt):
363
  print("Trying to access gpt-neo-1.3B")
364
  print("prompt: \t", prompt)
365
+
366
+ # Generate the proposal
367
+ model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
368
+ tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
369
+
 
 
 
370
 
371
  try:
372
+ # input_ids = tokenizer.encode(prompt, return_tensors="pt")
373
+ # Truncate the prompt to fit within the model's input limits
374
+ max_input_length = 2048 # Adjust as per your model's limit
375
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
376
+
377
+
378
  print("Input IDs shape:", input_ids.shape)
379
  output = model.generate(
380
+ input_ids,
381
+ # max_length=300,
382
+ max_new_tokens=500,
383
+ num_return_sequences=1,
384
  no_repeat_ngram_size=2,
385
+ temperature=0.5,
386
+ pad_token_id=tokenizer.eos_token_id # Ensure padding with EOS token
387
+ )
388
  print("Output shape:", output.shape)
389
+
390
  proposal = tokenizer.decode(output[0], skip_special_tokens=True)
391
+ if "Project Proposal:" in proposal:
392
+ proposal = proposal.split("Project Proposal:", 1)[1].strip()
393
+ else:
394
+ proposal = proposal.strip()
395
+
396
+ # print("Successfully accessed gpt-neo-1.3B and returning")
397
+ print("Generated Proposal: ", proposal)
398
  return proposal
399
  except Exception as e:
400
  print("Error generating proposal:", str(e))
401
+ return "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
402
 
403
 
404
 
 
423
  location = ", ".join([item.strip() for item in location_clusters[loc] if item]) # Clean and join
424
  problem_domain = ", ".join([item.strip() for item in problem_clusters[prob] if item]) # Clean and join
425
  problem_descriptions = problem_cluster_df.loc[loc, prob]
426
+
427
 
428
  print("location: ", location)
429
  print("problem_domain: ", problem_domain)
 
431
 
432
  # Check if problem_descriptions is valid (not NaN and not an empty list)
433
  if isinstance(problem_descriptions, list) and problem_descriptions:
434
+ # print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
435
+ print(f"Generating PP")
436
 
437
  # Prepare the prompt
438
+ # problems_summary = "; \n".join(problem_descriptions) # Join all problem descriptions
439
+ problems_summary = "; \n".join(problem_descriptions[:3]) # Limit to first 3 for brevity
440
 
441
+ # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
442
+ prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
 
443
 
444
+ proposals[(loc, prob)] = generate_project_proposal(prompt)
445
  else:
446
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
447