Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -194,7 +194,7 @@ from collections import Counter
|
|
194 |
|
195 |
def extract_problem_domains(df,
|
196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
197 |
-
cluster_range=(
|
198 |
top_words=10):
|
199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
200 |
|
@@ -276,7 +276,7 @@ def text_processing_for_location(text):
|
|
276 |
def extract_location_clusters(df,
|
277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
278 |
text_column2='Geographical_Location', # User Input
|
279 |
-
cluster_range=(
|
280 |
top_words=10):
|
281 |
# Combine the two text columns
|
282 |
text_column = "Combined_Location_Text"
|
@@ -371,7 +371,7 @@ def generate_project_proposal(prompt): # Generate the proposal
|
|
371 |
|
372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
374 |
-
model_max_token_limit = 2048 #1500
|
375 |
|
376 |
try:
|
377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
@@ -457,13 +457,17 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
|
|
457 |
|
458 |
random.shuffle(shuffled_descriptions)
|
459 |
# Prepare the prompt
|
460 |
-
# problems_summary = "; \n".join(shuffled_descriptions[:
|
461 |
-
problems_summary = "; \n".join(
|
|
|
|
|
462 |
|
463 |
|
464 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
465 |
# prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
466 |
-
prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
|
|
|
|
|
467 |
|
468 |
proposal = generate_project_proposal(prompt)
|
469 |
# Check if proposal is valid
|
@@ -766,9 +770,9 @@ def process_excel(file):
|
|
766 |
|
767 |
|
768 |
example_files = []
|
769 |
-
|
770 |
-
|
771 |
-
example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
|
772 |
|
773 |
|
774 |
import random
|
@@ -785,7 +789,7 @@ interface = gr.Interface(
|
|
785 |
|
786 |
outputs=[
|
787 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
788 |
-
gr.Textbox(label="Console Messages", lines=
|
789 |
],
|
790 |
|
791 |
|
|
|
194 |
|
195 |
def extract_problem_domains(df,
|
196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
197 |
+
cluster_range=(2, 10),
|
198 |
top_words=10):
|
199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
200 |
|
|
|
276 |
def extract_location_clusters(df,
|
277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
278 |
text_column2='Geographical_Location', # User Input
|
279 |
+
cluster_range=(2, 10),
|
280 |
top_words=10):
|
281 |
# Combine the two text columns
|
282 |
text_column = "Combined_Location_Text"
|
|
|
371 |
|
372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
374 |
+
model_max_token_limit = 1750 #2048 #1500
|
375 |
|
376 |
try:
|
377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
|
|
457 |
|
458 |
random.shuffle(shuffled_descriptions)
|
459 |
# Prepare the prompt
|
460 |
+
# problems_summary = "; \n".join(shuffled_descriptions[:3]) # Limit to first 3 for brevity
|
461 |
+
# problems_summary = "; \n".join([f"Problem: {desc}" for desc in shuffled_descriptions[:5]])
|
462 |
+
problems_summary = "; \n".join([f"Problem {i+1}: {desc}" for i, desc in enumerate(shuffled_descriptions[:7])])
|
463 |
+
# problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
|
464 |
|
465 |
|
466 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
467 |
# prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
468 |
+
# prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
|
469 |
+
prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\n\n {problems_summary}\n\nSingle Combined Project Proposal: \t"
|
470 |
+
|
471 |
|
472 |
proposal = generate_project_proposal(prompt)
|
473 |
# Check if proposal is valid
|
|
|
770 |
|
771 |
|
772 |
example_files = []
|
773 |
+
example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
|
774 |
+
example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
|
775 |
+
# example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
|
776 |
|
777 |
|
778 |
import random
|
|
|
789 |
|
790 |
outputs=[
|
791 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
792 |
+
gr.Textbox(label="Console Messages", lines=7, interactive=False) # Console messages output
|
793 |
],
|
794 |
|
795 |
|