Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -359,22 +359,18 @@ def create_cluster_dataframes(processed_df):
|
|
359 |
return budget_cluster_df, problem_cluster_df
|
360 |
|
361 |
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
362 |
-
def generate_project_proposal(
|
363 |
print("Trying to access gpt-neo-1.3B")
|
364 |
-
print("
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
# prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
|
375 |
-
prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
376 |
-
|
377 |
-
# Generate the proposal
|
378 |
try:
|
379 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
380 |
print("Input IDs shape:", input_ids.shape)
|
@@ -417,14 +413,21 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
|
|
417 |
print("problem_descriptions: ",problem_descriptions)
|
418 |
|
419 |
if problem_descriptions and not pd.isna(problem_descriptions):
|
|
|
420 |
print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
|
|
421 |
# console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
|
|
|
|
|
|
|
|
426 |
proposals[(loc, prob)] = proposal
|
427 |
print("Generated Proposal: ", proposal)
|
|
|
428 |
else:
|
429 |
print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
|
430 |
|
@@ -509,9 +512,6 @@ def nlp_pipeline(original_df):
|
|
509 |
except Exception as e:
|
510 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
511 |
console_messages.append("NLP pipeline for location extraction completed.")
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
|
516 |
|
517 |
# Create cluster dataframes
|
@@ -525,8 +525,8 @@ def nlp_pipeline(original_df):
|
|
525 |
# # Generate project proposals
|
526 |
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
527 |
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
528 |
-
print("\n location_clusters_2: ", location_clusters)
|
529 |
-
print("\n problem_clusters_2: ", problem_clusters)
|
530 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
531 |
|
532 |
console_messages.append("NLP pipeline completed.")
|
@@ -556,11 +556,11 @@ def process_excel(file):
|
|
556 |
|
557 |
output_filename = "OutPut_PPs.xlsx"
|
558 |
with pd.ExcelWriter(output_filename) as writer:
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
|
565 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
566 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
|
|
359 |
return budget_cluster_df, problem_cluster_df
|
360 |
|
361 |
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
362 |
+
def generate_project_proposal(prompt):
|
363 |
print("Trying to access gpt-neo-1.3B")
|
364 |
+
print("prompt: \t", prompt)
|
365 |
+
try:
|
366 |
+
# Generate the proposal
|
367 |
+
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
368 |
+
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
369 |
+
except Exception as e:
|
370 |
+
print("Error loading models:", str(e))
|
371 |
+
console_messages.append("\n Error Loading Models")
|
372 |
+
return prompt
|
373 |
+
|
|
|
|
|
|
|
|
|
374 |
try:
|
375 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
376 |
print("Input IDs shape:", input_ids.shape)
|
|
|
413 |
print("problem_descriptions: ",problem_descriptions)
|
414 |
|
415 |
if problem_descriptions and not pd.isna(problem_descriptions):
|
416 |
+
|
417 |
print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
418 |
+
|
419 |
# console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
420 |
+
|
421 |
+
# Prepare the prompt
|
422 |
+
problems_summary = "; ".join(problem_descriptions[:3]) # Limit to first 3 for brevity
|
423 |
+
# problems_summary = "; ".join(problem_descriptions)
|
424 |
+
# prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
|
425 |
+
prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
426 |
+
|
427 |
+
proposal = generate_project_proposal(prompt)
|
428 |
proposals[(loc, prob)] = proposal
|
429 |
print("Generated Proposal: ", proposal)
|
430 |
+
|
431 |
else:
|
432 |
print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
|
433 |
|
|
|
512 |
except Exception as e:
|
513 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
514 |
console_messages.append("NLP pipeline for location extraction completed.")
|
|
|
|
|
|
|
515 |
|
516 |
|
517 |
# Create cluster dataframes
|
|
|
525 |
# # Generate project proposals
|
526 |
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
527 |
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
528 |
+
# print("\n location_clusters_2: ", location_clusters)
|
529 |
+
# print("\n problem_clusters_2: ", problem_clusters)
|
530 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
531 |
|
532 |
console_messages.append("NLP pipeline completed.")
|
|
|
556 |
|
557 |
output_filename = "OutPut_PPs.xlsx"
|
558 |
with pd.ExcelWriter(output_filename) as writer:
|
559 |
+
### Convert project_proposals dictionary to DataFrame
|
560 |
+
project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
|
561 |
+
project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
|
562 |
+
project_proposals_df.reset_index(inplace=True)
|
563 |
+
project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
|
564 |
|
565 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
566 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|