SantanuBanerjee commited on
Commit
db2900b
·
verified ·
1 Parent(s): 1534c20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -30
app.py CHANGED
@@ -352,43 +352,56 @@ def create_cluster_dataframes(processed_df):
352
 
353
  return budget_cluster_df, problem_cluster_df
354
 
 
 
 
 
 
355
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
356
- def generate_project_proposal(prompt):
357
- print("Trying to access gpt-neo-1.3B")
358
- print("prompt: \t", prompt)
359
-
360
- # Generate the proposal
361
- model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
362
- tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
363
 
 
 
 
364
 
365
  try:
366
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
367
  # Truncate the prompt to fit within the model's input limits
368
- max_input_length = 1024 # Adjust as per your model's limit
369
- input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
370
 
371
 
372
  print("Input IDs shape:", input_ids.shape)
 
373
  output = model.generate(
374
  input_ids,
375
- # max_length=300,
376
- max_new_tokens=500,
377
  num_return_sequences=1,
378
  no_repeat_ngram_size=2,
379
  temperature=0.5,
380
  pad_token_id=tokenizer.eos_token_id # Ensure padding with EOS token
381
  )
382
  print("Output shape:", output.shape)
 
 
 
 
 
 
 
 
 
 
383
 
384
- proposal = tokenizer.decode(output[0], skip_special_tokens=True)
385
- if "Project Proposal:" in proposal:
386
- proposal = proposal.split("Project Proposal:", 1)[1].strip()
387
- else:
388
- proposal = proposal.strip()
389
 
390
  # print("Successfully accessed gpt-neo-1.3B and returning")
391
- print("Generated Proposal: ", proposal)
392
  return proposal
393
  except Exception as e:
394
  print("Error generating proposal:", str(e))
@@ -404,8 +417,6 @@ import copy
404
  def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters):
405
  consoleMessage_and_Print("\n Starting function: create_project_proposals")
406
  proposals = {}
407
-
408
- sanban_debug = False
409
 
410
  for loc in budget_cluster_df.index:
411
  consoleMessage_and_Print(f"\n loc: {loc}")
@@ -432,26 +443,20 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
432
  # Prepare the prompt
433
  # problems_summary = "; \n".join(problem_descriptions) # Join all problem descriptions
434
  # problems_summary = "; \n".join(problem_descriptions[:3]) # Limit to first 3 for brevity
435
- problems_summary = "; \n".join(shuffled_descriptions[:3])
436
 
437
 
438
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
439
  # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
440
- prompt = f"Generate a single solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: <only output this proposal>"
441
 
442
  proposal = generate_project_proposal(prompt)
443
  # Check if proposal is valid
444
  if isinstance(proposal, str) and proposal.strip(): # Valid string that's not empty
445
  proposals[(loc, prob)] = proposal
446
 
447
- sanban_debug = True
448
- break
449
-
450
  else:
451
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
452
-
453
- if sanban_debug:
454
- break
455
 
456
  return proposals
457
 
@@ -746,8 +751,8 @@ def process_excel(file):
746
 
747
 
748
  example_files = []
749
- example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
750
- # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
751
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
752
 
753
 
@@ -765,7 +770,7 @@ interface = gr.Interface(
765
 
766
  outputs=[
767
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
768
- gr.Textbox(label="Console Messages", lines=10, interactive=False) # Console messages output
769
  ],
770
 
771
 
 
352
 
353
  return budget_cluster_df, problem_cluster_df
354
 
355
+
356
+
357
+
358
+
359
+
360
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
361
+ def generate_project_proposal(prompt): # Generate the proposal
362
+ # model_Name = "EleutherAI/gpt-neo-2.7B"
363
+ model_Name = "EleutherAI/gpt-neo-1.3B"
364
+
365
+ consoleMessage_and_Print(f"Trying to access {model_Name} model. The Prompt is: \n{prompt}")
 
 
366
 
367
+ model = GPTNeoForCausalLM.from_pretrained(model_Name)
368
+ tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
369
+ model_max_token_limit = 2048
370
 
371
  try:
372
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
373
  # Truncate the prompt to fit within the model's input limits
374
+ # Adjust as per your model's limit
375
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length = model_max_token_limit/2)
376
 
377
 
378
  print("Input IDs shape:", input_ids.shape)
379
+ # Generate the output
380
  output = model.generate(
381
  input_ids,
382
+ max_new_tokens = model_max_token_limit,
 
383
  num_return_sequences=1,
384
  no_repeat_ngram_size=2,
385
  temperature=0.5,
386
  pad_token_id=tokenizer.eos_token_id # Ensure padding with EOS token
387
  )
388
  print("Output shape:", output.shape)
389
+
390
+
391
+ # Decode the output to text
392
+ full_returned_segment = tokenizer.decode(output[0], skip_special_tokens=True)
393
+
394
+ # Slice off the input part if the input length is known
395
+ input_length = input_ids.shape[1]
396
+ generated_part = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
397
+
398
+ proposal = generated_part.strip()
399
 
400
+ # if "Project Proposal:" in proposal:
401
+ # proposal = proposal.split("Project Proposal:", 1)[1].strip()
 
 
 
402
 
403
  # print("Successfully accessed gpt-neo-1.3B and returning")
404
+ print("Generated Proposal: \n", proposal,"\n\n")
405
  return proposal
406
  except Exception as e:
407
  print("Error generating proposal:", str(e))
 
417
  def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters):
418
  consoleMessage_and_Print("\n Starting function: create_project_proposals")
419
  proposals = {}
 
 
420
 
421
  for loc in budget_cluster_df.index:
422
  consoleMessage_and_Print(f"\n loc: {loc}")
 
443
  # Prepare the prompt
444
  # problems_summary = "; \n".join(problem_descriptions) # Join all problem descriptions
445
  # problems_summary = "; \n".join(problem_descriptions[:3]) # Limit to first 3 for brevity
446
+ problems_summary = "; \n".join(shuffled_descriptions[:7])
447
 
448
 
449
  # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
450
  # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
451
+ prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
452
 
453
  proposal = generate_project_proposal(prompt)
454
  # Check if proposal is valid
455
  if isinstance(proposal, str) and proposal.strip(): # Valid string that's not empty
456
  proposals[(loc, prob)] = proposal
457
 
 
 
 
458
  else:
459
  print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
 
 
 
460
 
461
  return proposals
462
 
 
751
 
752
 
753
  example_files = []
754
+ # example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
755
+ example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
756
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
757
 
758
 
 
770
 
771
  outputs=[
772
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
773
+ gr.Textbox(label="Console Messages", lines=5, interactive=False) # Console messages output
774
  ],
775
 
776