Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 7, 2024

Commit

b6f8dc2

verified ·

1 Parent(s): 480770e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -16

app.py CHANGED Viewed

@@ -359,6 +359,8 @@ def create_cluster_dataframes(processed_df):
 from random import uniform
 from transformers import GPTNeoForCausalLM, GPT2Tokenizer
 def generate_project_proposal(prompt): # Generate the proposal
     # model_Name = "EleutherAI/gpt-neo-2.7B"
     # tempareCHUR = uniform(0.3,0.6)
@@ -369,16 +371,15 @@ def generate_project_proposal(prompt): # Generate the proposal
     model = GPTNeoForCausalLM.from_pretrained(model_Name)
     tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
-    model_max_token_limit = 2047
     try:
         # input_ids = tokenizer.encode(prompt, return_tensors="pt")
         # Truncate the prompt to fit within the model's input limits
         # Adjust as per your model's limit
-        input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length = int(model_max_token_limit/2) )
         print("Input IDs shape:", input_ids.shape)
         pad_tokenId = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id # Padding with EOS token may always be great
         attentionMask = input_ids.ne(pad_tokenId).long()
@@ -386,8 +387,8 @@ def generate_project_proposal(prompt): # Generate the proposal
         # Generate the output
         output = model.generate(
             input_ids,
-            min_length = int(model_max_token_limit/3),  # minimum length of the generated output
-            max_new_tokens = model_max_token_limit,
             num_return_sequences=1,
             no_repeat_ngram_size=2,
             temperature=tempareCHUR,
@@ -396,25 +397,32 @@ def generate_project_proposal(prompt): # Generate the proposal
             )
         print("Output shape:", output.shape)
         # Decode the output to text
-        # full_returned_segment = tokenizer.decode(output[0], skip_special_tokens=True)
-        # Slice off the input part if the input length is known
-        input_length = input_ids.shape[1]
-        generated_part = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
         proposal = generated_part.strip()
         # if "Project Proposal:" in proposal:
         #     proposal = proposal.split("Project Proposal:", 1)[1].strip()
-        # print("Successfully accessed gpt-neo-1.3B and returning")
         print("Generated Proposal: \n", proposal,"\n\n")
         return proposal
     except Exception as e:
         print("Error generating proposal:", str(e))
-        return "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
@@ -759,8 +767,8 @@ def process_excel(file):
 example_files = []
-# example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
-example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
 # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')

 from random import uniform
 from transformers import GPTNeoForCausalLM, GPT2Tokenizer
 def generate_project_proposal(prompt): # Generate the proposal
+    default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
     # model_Name = "EleutherAI/gpt-neo-2.7B"
     # tempareCHUR = uniform(0.3,0.6)
     model = GPTNeoForCausalLM.from_pretrained(model_Name)
     tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
+    model_max_token_limit = 1500 #2048
     try:
         # input_ids = tokenizer.encode(prompt, return_tensors="pt")
         # Truncate the prompt to fit within the model's input limits
         # Adjust as per your model's limit
+        input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length = int(2*model_max_token_limit/3) )
         print("Input IDs shape:", input_ids.shape)
+        input_length = input_ids.shape[1] # Slice off the input part if the input length is known
         pad_tokenId = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id # Padding with EOS token may always be great
         attentionMask = input_ids.ne(pad_tokenId).long()
         # Generate the output
         output = model.generate(
             input_ids,
+            min_length = int(model_max_token_limit/7),  # minimum length of the generated output
+            max_new_tokens = int(model_max_token_limit/3),
             num_return_sequences=1,
             no_repeat_ngram_size=2,
             temperature=tempareCHUR,
             )
         print("Output shape:", output.shape)
         # Decode the output to text
+        full_returned_segment = tokenizer.decode(output[0], skip_special_tokens=True)
+        PP_in_fullReturn = "Project Proposal:" in full_returned_segment
+        if output is not None and output.shape[1] > 0:
+            # Decode the output
+            if output.shape[1] > input_length and PP_in_fullReturn:
+                generated_part = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
+            else:
+                generated_part = tokenizer.decode(output[0], skip_special_tokens=True)
+        else:
+            # Handle the error case, e.g., return an empty string or a default value
+            raise Exception("Error generating proposal: output is empty or None")
         proposal = generated_part.strip()
         # if "Project Proposal:" in proposal:
         #     proposal = proposal.split("Project Proposal:", 1)[1].strip()
         print("Generated Proposal: \n", proposal,"\n\n")
         return proposal
     except Exception as e:
         print("Error generating proposal:", str(e))
+        return default_proposal
 example_files = []
+example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
+# example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
 # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')