Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 7, 2024

Commit

e46b418

verified ·

1 Parent(s): 3e19acf

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -194,7 +194,7 @@ from collections import Counter
 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
-                            cluster_range=(6, 10),
                             top_words=7):
     consoleMessage_and_Print("Extracting Problem Domains...")
@@ -276,7 +276,7 @@ def text_processing_for_location(text):
 def extract_location_clusters(df,
                               text_column1='Processed_LocationText_forClustering', # Extracted through NLP
                               text_column2='Geographical_Location', # User Input
-                              cluster_range=(2, 5),
                               top_words=3):
     # Combine the two text columns
     text_column = "Combined_Location_Text"
@@ -397,7 +397,7 @@ def generate_project_proposal(prompt):
@@ -415,24 +415,29 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
             location = ", ".join([item.strip() for item in location_clusters[loc] if item])  # Clean and join
             problem_domain = ", ".join([item.strip() for item in problem_clusters[prob] if item])  # Clean and join
-            problem_descriptions = problem_cluster_df.loc[loc, prob]
             print("location: ", location)
             print("problem_domain: ", problem_domain)
-            print("problem_descriptions: ", problem_descriptions)
             # Check if problem_descriptions is valid (not NaN and not an empty list)
-            if isinstance(problem_descriptions, list) and problem_descriptions:
                 # print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
                 consoleMessage_and_Print(f"Generating PP")
                 # Prepare the prompt
                 # problems_summary = "; \n".join(problem_descriptions)  # Join all problem descriptions
-                problems_summary = "; \n".join(problem_descriptions[:3])  # Limit to first 3 for brevity
                 # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
-                prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
                 proposal = generate_project_proposal(prompt)
                 # Check if proposal is valid

 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
+                            cluster_range=(6, 8),
                             top_words=7):
     consoleMessage_and_Print("Extracting Problem Domains...")
 def extract_location_clusters(df,
                               text_column1='Processed_LocationText_forClustering', # Extracted through NLP
                               text_column2='Geographical_Location', # User Input
+                              cluster_range=(3, 5),
                               top_words=3):
     # Combine the two text columns
     text_column = "Combined_Location_Text"
+import copy
             location = ", ".join([item.strip() for item in location_clusters[loc] if item])  # Clean and join
             problem_domain = ", ".join([item.strip() for item in problem_clusters[prob] if item])  # Clean and join
+            shuffled_descriptions = copy.deepcopy(problem_cluster_df.loc[loc, prob])
+            # Create a deep copy of the problem descriptions, shuffle it, and join the first 10
             print("location: ", location)
             print("problem_domain: ", problem_domain)
+            print("problem_descriptions: ", shuffled_descriptions)
             # Check if problem_descriptions is valid (not NaN and not an empty list)
+            if isinstance(shuffled_descriptions, list) and shuffled_descriptions:
                 # print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
                 consoleMessage_and_Print(f"Generating PP")
+                random.shuffle(shuffled_descriptions)
                 # Prepare the prompt
                 # problems_summary = "; \n".join(problem_descriptions)  # Join all problem descriptions
+                # problems_summary = "; \n".join(problem_descriptions[:3])  # Limit to first 3 for brevity
+                problems_summary = "; \n".join(shuffled_descriptions[:3])
                 # prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
+                # prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
+                prompt = f"Generate a single solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: <only output this proposal>"
                 proposal = generate_project_proposal(prompt)
                 # Check if proposal is valid