Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 8, 2024

Commit

42a5ed8

verified ·

1 Parent(s): 5ad8a00

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -194,7 +194,7 @@ from collections import Counter
 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
-                            cluster_range=(3, 17),
                             top_words=10):
     consoleMessage_and_Print("Extracting Problem Domains...")
@@ -276,7 +276,7 @@ def text_processing_for_location(text):
 def extract_location_clusters(df,
                               text_column1='Processed_LocationText_forClustering', # Extracted through NLP
                               text_column2='Geographical_Location', # User Input
-                              cluster_range=(3, 17),
                               top_words=10):
     # Combine the two text columns
     text_column = "Combined_Location_Text"
@@ -371,7 +371,7 @@ def generate_project_proposal(prompt): # Generate the proposal
     model = GPTNeoForCausalLM.from_pretrained(model_Name)
     tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
-    model_max_token_limit = 2048 #1500
     try:
         # input_ids = tokenizer.encode(prompt, return_tensors="pt")
@@ -457,13 +457,17 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
                 random.shuffle(shuffled_descriptions)
                 # Prepare the prompt
-                # problems_summary = "; \n".join(shuffled_descriptions[:7]) # Limit to first 3 for brevity
-                problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
                 # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
                 # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
-                prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
                 proposal = generate_project_proposal(prompt)
                 # Check if proposal is valid
@@ -766,9 +770,9 @@ def process_excel(file):
 example_files = []
-# example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
-# example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
-example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
 import random
@@ -785,7 +789,7 @@ interface = gr.Interface(
     outputs=[
         gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"),  # File download output
-        gr.Textbox(label="Console Messages", lines=5, interactive=False)  # Console messages output
         ],

 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
+                            cluster_range=(2, 10),
                             top_words=10):
     consoleMessage_and_Print("Extracting Problem Domains...")
 def extract_location_clusters(df,
                               text_column1='Processed_LocationText_forClustering', # Extracted through NLP
                               text_column2='Geographical_Location', # User Input
+                              cluster_range=(2, 10),
                               top_words=10):
     # Combine the two text columns
     text_column = "Combined_Location_Text"
     model = GPTNeoForCausalLM.from_pretrained(model_Name)
     tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
+    model_max_token_limit = 1750 #2048 #1500
     try:
         # input_ids = tokenizer.encode(prompt, return_tensors="pt")
                 random.shuffle(shuffled_descriptions)
                 # Prepare the prompt
+                # problems_summary = "; \n".join(shuffled_descriptions[:3]) # Limit to first 3 for brevity
+                # problems_summary = "; \n".join([f"Problem: {desc}" for desc in shuffled_descriptions[:5]])
+                problems_summary = "; \n".join([f"Problem {i+1}: {desc}" for i, desc in enumerate(shuffled_descriptions[:7])])
+                # problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
                 # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
                 # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
+                # prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
+                prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\n\n {problems_summary}\n\nSingle Combined Project Proposal: \t"
                 proposal = generate_project_proposal(prompt)
                 # Check if proposal is valid
 example_files = []
+example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
+example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
+# example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
 import random
     outputs=[
         gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"),  # File download output
+        gr.Textbox(label="Console Messages", lines=7, interactive=False)  # Console messages output
         ],