SantanuBanerjee commited on
Commit
b6f8dc2
·
verified ·
1 Parent(s): 480770e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -359,6 +359,8 @@ def create_cluster_dataframes(processed_df):
359
  from random import uniform
360
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
361
  def generate_project_proposal(prompt): # Generate the proposal
 
 
362
  # model_Name = "EleutherAI/gpt-neo-2.7B"
363
  # tempareCHUR = uniform(0.3,0.6)
364
 
@@ -369,16 +371,15 @@ def generate_project_proposal(prompt): # Generate the proposal
369
 
370
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
371
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
372
- model_max_token_limit = 2047
373
 
374
  try:
375
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
376
  # Truncate the prompt to fit within the model's input limits
377
  # Adjust as per your model's limit
378
- input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length = int(model_max_token_limit/2) )
379
-
380
-
381
  print("Input IDs shape:", input_ids.shape)
 
382
 
383
  pad_tokenId = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id # Padding with EOS token may always be great
384
  attentionMask = input_ids.ne(pad_tokenId).long()
@@ -386,8 +387,8 @@ def generate_project_proposal(prompt): # Generate the proposal
386
  # Generate the output
387
  output = model.generate(
388
  input_ids,
389
- min_length = int(model_max_token_limit/3), # minimum length of the generated output
390
- max_new_tokens = model_max_token_limit,
391
  num_return_sequences=1,
392
  no_repeat_ngram_size=2,
393
  temperature=tempareCHUR,
@@ -396,25 +397,32 @@ def generate_project_proposal(prompt): # Generate the proposal
396
  )
397
  print("Output shape:", output.shape)
398
 
399
-
400
  # Decode the output to text
401
- # full_returned_segment = tokenizer.decode(output[0], skip_special_tokens=True)
 
402
 
403
- # Slice off the input part if the input length is known
404
- input_length = input_ids.shape[1]
405
- generated_part = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
 
 
 
 
 
 
406
 
407
  proposal = generated_part.strip()
408
 
 
409
  # if "Project Proposal:" in proposal:
410
  # proposal = proposal.split("Project Proposal:", 1)[1].strip()
411
-
412
- # print("Successfully accessed gpt-neo-1.3B and returning")
413
  print("Generated Proposal: \n", proposal,"\n\n")
414
  return proposal
415
  except Exception as e:
416
  print("Error generating proposal:", str(e))
417
- return "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
418
 
419
 
420
 
@@ -759,8 +767,8 @@ def process_excel(file):
759
 
760
 
761
  example_files = []
762
- # example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
763
- example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
764
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
765
 
766
 
 
359
  from random import uniform
360
  from transformers import GPTNeoForCausalLM, GPT2Tokenizer
361
  def generate_project_proposal(prompt): # Generate the proposal
362
+ default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
363
+
364
  # model_Name = "EleutherAI/gpt-neo-2.7B"
365
  # tempareCHUR = uniform(0.3,0.6)
366
 
 
371
 
372
  model = GPTNeoForCausalLM.from_pretrained(model_Name)
373
  tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
374
+ model_max_token_limit = 1500 #2048
375
 
376
  try:
377
  # input_ids = tokenizer.encode(prompt, return_tensors="pt")
378
  # Truncate the prompt to fit within the model's input limits
379
  # Adjust as per your model's limit
380
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length = int(2*model_max_token_limit/3) )
 
 
381
  print("Input IDs shape:", input_ids.shape)
382
+ input_length = input_ids.shape[1] # Slice off the input part if the input length is known
383
 
384
  pad_tokenId = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id # Padding with EOS token may always be great
385
  attentionMask = input_ids.ne(pad_tokenId).long()
 
387
  # Generate the output
388
  output = model.generate(
389
  input_ids,
390
+ min_length = int(model_max_token_limit/7), # minimum length of the generated output
391
+ max_new_tokens = int(model_max_token_limit/3),
392
  num_return_sequences=1,
393
  no_repeat_ngram_size=2,
394
  temperature=tempareCHUR,
 
397
  )
398
  print("Output shape:", output.shape)
399
 
 
400
  # Decode the output to text
401
+ full_returned_segment = tokenizer.decode(output[0], skip_special_tokens=True)
402
+ PP_in_fullReturn = "Project Proposal:" in full_returned_segment
403
 
404
+ if output is not None and output.shape[1] > 0:
405
+ # Decode the output
406
+ if output.shape[1] > input_length and PP_in_fullReturn:
407
+ generated_part = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
408
+ else:
409
+ generated_part = tokenizer.decode(output[0], skip_special_tokens=True)
410
+ else:
411
+ # Handle the error case, e.g., return an empty string or a default value
412
+ raise Exception("Error generating proposal: output is empty or None")
413
 
414
  proposal = generated_part.strip()
415
 
416
+
417
  # if "Project Proposal:" in proposal:
418
  # proposal = proposal.split("Project Proposal:", 1)[1].strip()
419
+
420
+
421
  print("Generated Proposal: \n", proposal,"\n\n")
422
  return proposal
423
  except Exception as e:
424
  print("Error generating proposal:", str(e))
425
+ return default_proposal
426
 
427
 
428
 
 
767
 
768
 
769
  example_files = []
770
+ example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
771
+ # example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
772
  # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
773
 
774