Spaces:
Sleeping
Sleeping
Update app.py
Browse filesThis is the best version that can be run. If this doesn't work, you can always use the previous setting which is the MVP
app.py
CHANGED
@@ -194,8 +194,8 @@ from collections import Counter
|
|
194 |
|
195 |
def extract_problem_domains(df,
|
196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
197 |
-
cluster_range=(
|
198 |
-
top_words=
|
199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
200 |
|
201 |
# Sentence Transformers approach
|
@@ -276,8 +276,8 @@ def text_processing_for_location(text):
|
|
276 |
def extract_location_clusters(df,
|
277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
278 |
text_column2='Geographical_Location', # User Input
|
279 |
-
cluster_range=(3,
|
280 |
-
top_words=
|
281 |
# Combine the two text columns
|
282 |
text_column = "Combined_Location_Text"
|
283 |
df[text_column] = df[text_column1] + ' ' + df[text_column2]
|
@@ -361,17 +361,17 @@ from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
|
361 |
def generate_project_proposal(prompt): # Generate the proposal
|
362 |
default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
|
363 |
|
364 |
-
|
365 |
-
|
366 |
|
367 |
-
model_Name = "EleutherAI/gpt-neo-1.3B"
|
368 |
-
tempareCHUR = uniform(0.5,0.8)
|
369 |
|
370 |
consoleMessage_and_Print(f"Trying to access {model_Name} model. The Prompt is: \n{prompt}")
|
371 |
|
372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
374 |
-
model_max_token_limit =
|
375 |
|
376 |
try:
|
377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
@@ -457,9 +457,8 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
|
|
457 |
|
458 |
random.shuffle(shuffled_descriptions)
|
459 |
# Prepare the prompt
|
460 |
-
# problems_summary = "; \n".join(
|
461 |
-
|
462 |
-
problems_summary = "; \n".join(shuffled_descriptions[:7])
|
463 |
|
464 |
|
465 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
@@ -767,9 +766,9 @@ def process_excel(file):
|
|
767 |
|
768 |
|
769 |
example_files = []
|
770 |
-
example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
|
771 |
# example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
|
772 |
-
|
773 |
|
774 |
|
775 |
import random
|
|
|
194 |
|
195 |
def extract_problem_domains(df,
|
196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
197 |
+
cluster_range=(3, 17),
|
198 |
+
top_words=10):
|
199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
200 |
|
201 |
# Sentence Transformers approach
|
|
|
276 |
def extract_location_clusters(df,
|
277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
278 |
text_column2='Geographical_Location', # User Input
|
279 |
+
cluster_range=(3, 17),
|
280 |
+
top_words=10):
|
281 |
# Combine the two text columns
|
282 |
text_column = "Combined_Location_Text"
|
283 |
df[text_column] = df[text_column1] + ' ' + df[text_column2]
|
|
|
361 |
def generate_project_proposal(prompt): # Generate the proposal
|
362 |
default_proposal = "Hyper-local Sustainability Projects would lead to Longevity of the self and Prosperity of the community. Therefore UNSDGs coupled with Longevity initiatives should be focused upon."
|
363 |
|
364 |
+
model_Name = "EleutherAI/gpt-neo-2.7B"
|
365 |
+
tempareCHUR = uniform(0.3,0.6)
|
366 |
|
367 |
+
# model_Name = "EleutherAI/gpt-neo-1.3B"
|
368 |
+
# tempareCHUR = uniform(0.5,0.8)
|
369 |
|
370 |
consoleMessage_and_Print(f"Trying to access {model_Name} model. The Prompt is: \n{prompt}")
|
371 |
|
372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
374 |
+
model_max_token_limit = 2048 #1500
|
375 |
|
376 |
try:
|
377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
|
|
457 |
|
458 |
random.shuffle(shuffled_descriptions)
|
459 |
# Prepare the prompt
|
460 |
+
# problems_summary = "; \n".join(shuffled_descriptions[:7]) # Limit to first 3 for brevity
|
461 |
+
problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
|
|
|
462 |
|
463 |
|
464 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
|
|
766 |
|
767 |
|
768 |
example_files = []
|
769 |
+
# example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
|
770 |
# example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
|
771 |
+
example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
|
772 |
|
773 |
|
774 |
import random
|