Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,165 +1,166 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from pydantic import BaseModel
|
3 |
-
import openai
|
4 |
-
import os
|
5 |
-
|
6 |
-
# Initialize OpenAI API Key
|
7 |
-
openai.api_key = os.getenv("API_KEY")
|
8 |
-
|
9 |
-
app = FastAPI()
|
10 |
-
|
11 |
-
# Pydantic Model for Student Data Input
|
12 |
-
class SOCAInput(BaseModel):
|
13 |
-
student_data: list # List of student SOCA analysis strings
|
14 |
-
|
15 |
-
|
16 |
-
# Function to generate keywords
|
17 |
-
def generate_keywords(student_data):
|
18 |
-
prompt_template = """
|
19 |
-
You are a knowledgeable assistant who extracts key strengths, opportunities, challenges, and action plan items from a student's SOCA analysis.
|
20 |
-
|
21 |
-
Please identify and list unique keywords or phrases for each category (Strengths, Opportunities, Challenges, and Action Plan) based on the analysis provided. Each keyword should consist of fewer than 7 words to ensure they are short, precise, and concise.
|
22 |
-
**Also DONT include scientific principles or similar keyword in the output. If any such keyword appears, just discard it from output**
|
23 |
-
|
24 |
-
Return the extracted keywords in the following format:
|
25 |
-
|
26 |
-
Strengths: [list of unique keywords]
|
27 |
-
Opportunities: [list of unique keywords]
|
28 |
-
Challenges: [list of unique keywords]
|
29 |
-
Action Plan: [list of unique keywords]
|
30 |
-
|
31 |
-
Now, please analyze the following data for the student data :{student_data}
|
32 |
-
"""
|
33 |
-
formatted_prompt = prompt_template.format(student_data=student_data)
|
34 |
-
|
35 |
-
response = openai.ChatCompletion.create(
|
36 |
-
model="gpt-4o-mini",
|
37 |
-
messages=[{"role": "system", "content": formatted_prompt}]
|
38 |
-
)
|
39 |
-
return response['choices'][0]['message']['content'].strip()
|
40 |
-
|
41 |
-
|
42 |
-
# Function to combine similar keywords
|
43 |
-
def combine_similar_keywords_with_llm(keywords, category):
|
44 |
-
prompt_template = """
|
45 |
-
You are tasked with processing a dictionary containing extracted keywords organized into different categories: "Strengths," "Opportunities," "Challenges," and "Action Plan." Each category contains a list of keywords, some of which may have similar meanings or be redundant. Your objective is to combine these similar keywords into a single keyword and increase the frequency count of that keyword based on the repetitions of the similar keywords in the original lists.
|
46 |
-
|
47 |
-
Important Note: The SOCA analysis is based on JEE preparation, so keep subjects like Mathematics, Chemistry, and Physics separate; do not merge them into general categories like "Sciences." Instead, focus on these subjects individually while combining keywords.
|
48 |
-
|
49 |
-
Here’s how to approach the task:
|
50 |
-
|
51 |
-
Review Each Category: Begin by examining the keywords in each category of the provided dictionary.
|
52 |
-
|
53 |
-
Identify Similar Keywords: For each category, identify keywords that have similar meanings or can be considered redundant or that can be of same subject keeping in mind JEE syllabus such as algebra,calculus can be merged with maths foundation to increase its frequency and mechanics can be merged into physics foundation to increase its frequency.
|
54 |
-
|
55 |
-
##IMPORTANT EXAMPLES FOR Redundant OR similar meaining keywords examples:(KEEP THESE EXAMPLES IN MIND WHILE MERGING SIMILAR KEYWORDS)
|
56 |
-
## BELOW EXAMPLES CAN BE CONSIDERED ACROSS ALL THE FOUR DICTIONARIES
|
57 |
-
1)Systematic Problem Solving and Problem Solving Skills and Comlpex Problem Solving and Understanding Long Questions, all of these keywords have similar meaning.
|
58 |
-
2)Imbalance in Study Time and Time Management Issues, have same meaning so we can increase frequency of imbalance in study time and then discard time management issues.
|
59 |
-
3)Logical reasoning and Pattern Recognition and high score in reasoning ,all have similar meaning so we can increase frequency of logical reasoning if we encounter patterns recognition or high socre in reasoning and then discrd them.
|
60 |
-
4)Maths Foundation and High Confidence In Mathematics, have same meaning so we can increase the frequency of maths foundation and then discard high confidence in mathematics.
|
61 |
-
5)Physcis Foundation and High Confidence in Physics, have the same meaning.
|
62 |
-
6)Long-term Memory Techniques and Memory Strategies ,have similar meaining.
|
63 |
-
7)Effective Time Allocation and Time managament and Efficient Study Time Allocation ,all have similar meaning so we can increase frequency of effective time allocation and then discard time management and Efficient Study Time Allocation.
|
64 |
-
8)self_awareness and accountability ,have similar meaning so we increase frequency of self_awareness in output and then discard adaptability.
|
65 |
-
9)Daily Mental Calculation Practice and Targeted Practice, these both also have same meaining.
|
66 |
-
10)Practice Quick Mental Calculation and Enhance Mental Calculation Skills and Quick Mental Calculation and Practice Quick Mental Calculation and Weak in Quick in Mental Calculation ,all of these have similar meaning.
|
67 |
-
|
68 |
-
Merge Keywords: When you identify similar keywords:
|
69 |
-
Choose one representative keyword that best encapsulates the meaning of the group.
|
70 |
-
Count the number of times the similar keywords appear in the original list and add this to the frequency of the representative keyword
|
71 |
-
##IMMPORTANT:Discard the similar keywords that have been merged.
|
72 |
-
|
73 |
-
Construct New Dictionary: Create a new dictionary that contains the merged keywords in the category '{category}', ensuring that each keyword only appears once and reflects its updated frequency counts.
|
74 |
-
|
75 |
-
Format the Output: Ensure the output is clear and structured, showing the updated keywords alongside their frequency counts.
|
76 |
-
Now, please combine the following keywords for the category '{category}': {keywords}, while keeping subject-specific keywords like Mathematics, Chemistry, and Physics separate.
|
77 |
-
### EXAMPLE OUTPUT FORMAT:
|
78 |
-
Strength =
|
79 |
-
'Maths Foundation': 3,
|
80 |
-
'Problem-Solving Skills': 4,
|
81 |
-
'Self-Awareness': 4,
|
82 |
-
'Adaptability': 3,
|
83 |
-
'Proactive Approach': 2,
|
84 |
-
'Time Management': 2,
|
85 |
-
'Logical Reasoning': 2,
|
86 |
-
'Scientific Principles': 3,
|
87 |
-
'Physics Foundation': 3,
|
88 |
-
'Critical Thinking': 1,
|
89 |
-
'Analytical Skills': 1,
|
90 |
-
'Long-term Memory': 1,
|
91 |
-
'Effective time allocation': 1,
|
92 |
-
'Confidence in Physics': 1
|
93 |
-
|
94 |
-
1)ALL THE FOUR DICTIONARIES ARE IN THE OUTPUT ONLY ONCE AND IN THE FORMAT SIMILAR TO EXAMPLE, DONT CONSIDER THIS EXAMPLE TO BE ABSOLUTE JUST TAKE IT AS A REFERENCE
|
95 |
-
2KEEP THE MAX NUMBER OF KEYWORDS IN EACH DICTIONARY TO BE 10 IF THERE ARE MORE UNIQUE KEYWORDS THEN TRY TO MAERGE IT WITH THE UNIQUES KEYWORDS HAVING THE CLOSEST MEANING TO IT. AND KEEP ONLY ONE KEYWORD FOR EACH CHEMISTRY,PHYSICS AND MATHS IF THERE WAS ANY KEYWORD SIMILAR TO THEM
|
96 |
-
3)IN THE OUTPUT TEXT SUCH AS python or (Here’s the merged dictionary for the category) OR (This output captures the merged keywords with their updated frequency counts, maintaining the focus on subject-specific terms and avoiding generalizations.) SHOULD NOT BE RETURNED IN THE OUTPUT
|
97 |
-
"""
|
98 |
-
formatted_prompt = prompt_template.format(category=category, keywords=str(keywords))
|
99 |
-
|
100 |
-
response = openai.ChatCompletion.create(
|
101 |
-
model="gpt-4o-mini",
|
102 |
-
messages=[{"role": "system", "content": formatted_prompt}]
|
103 |
-
)
|
104 |
-
return response['choices'][0]['message']['content'].strip()
|
105 |
-
|
106 |
-
def extract_keywords_from_summary(summary):
|
107 |
-
keywords = {}
|
108 |
-
for section in ["Strengths", "Opportunities", "Challenges", "Action Plan"]:
|
109 |
-
try:
|
110 |
-
start = summary.index(section) + len(section) + 2 # Move past the section header
|
111 |
-
end = summary.index("**", start) if "**" in summary[start:] else len(summary) # Find the next section or end
|
112 |
-
keywords[section] = [kw.strip() for kw in summary[start:end].replace('*', '').strip().split("\n") if kw.strip()]
|
113 |
-
except ValueError:
|
114 |
-
keywords[section] = [] # In case the section is not found
|
115 |
-
return keywords
|
116 |
-
|
117 |
-
@app.post("/process-soca/")
|
118 |
-
async def process_soca(data: SOCAInput):
|
119 |
-
combined_keywords = {
|
120 |
-
"Strengths": [],
|
121 |
-
"Opportunities": [],
|
122 |
-
"Challenges": [],
|
123 |
-
"Action Plan": []
|
124 |
-
}
|
125 |
-
|
126 |
-
|
127 |
-
for
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
"
|
143 |
-
"
|
144 |
-
"
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
keyword =
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
+
import openai
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Initialize OpenAI API Key
|
7 |
+
openai.api_key = os.getenv("API_KEY")
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
|
11 |
+
# Pydantic Model for Student Data Input
|
12 |
+
class SOCAInput(BaseModel):
|
13 |
+
student_data: list # List of student SOCA analysis strings
|
14 |
+
|
15 |
+
|
16 |
+
# Function to generate keywords
|
17 |
+
def generate_keywords(student_data):
|
18 |
+
prompt_template = """
|
19 |
+
You are a knowledgeable assistant who extracts key strengths, opportunities, challenges, and action plan items from a student's SOCA analysis.
|
20 |
+
|
21 |
+
Please identify and list unique keywords or phrases for each category (Strengths, Opportunities, Challenges, and Action Plan) based on the analysis provided. Each keyword should consist of fewer than 7 words to ensure they are short, precise, and concise.
|
22 |
+
**Also DONT include scientific principles or similar keyword in the output. If any such keyword appears, just discard it from output**
|
23 |
+
|
24 |
+
Return the extracted keywords in the following format:
|
25 |
+
|
26 |
+
Strengths: [list of unique keywords]
|
27 |
+
Opportunities: [list of unique keywords]
|
28 |
+
Challenges: [list of unique keywords]
|
29 |
+
Action Plan: [list of unique keywords]
|
30 |
+
|
31 |
+
Now, please analyze the following data for the student data :{student_data}
|
32 |
+
"""
|
33 |
+
formatted_prompt = prompt_template.format(student_data=student_data)
|
34 |
+
|
35 |
+
response = openai.ChatCompletion.create(
|
36 |
+
model="gpt-4o-mini",
|
37 |
+
messages=[{"role": "system", "content": formatted_prompt}]
|
38 |
+
)
|
39 |
+
return response['choices'][0]['message']['content'].strip()
|
40 |
+
|
41 |
+
|
42 |
+
# Function to combine similar keywords
|
43 |
+
def combine_similar_keywords_with_llm(keywords, category):
|
44 |
+
prompt_template = """
|
45 |
+
You are tasked with processing a dictionary containing extracted keywords organized into different categories: "Strengths," "Opportunities," "Challenges," and "Action Plan." Each category contains a list of keywords, some of which may have similar meanings or be redundant. Your objective is to combine these similar keywords into a single keyword and increase the frequency count of that keyword based on the repetitions of the similar keywords in the original lists.
|
46 |
+
|
47 |
+
Important Note: The SOCA analysis is based on JEE preparation, so keep subjects like Mathematics, Chemistry, and Physics separate; do not merge them into general categories like "Sciences." Instead, focus on these subjects individually while combining keywords.
|
48 |
+
|
49 |
+
Here’s how to approach the task:
|
50 |
+
|
51 |
+
Review Each Category: Begin by examining the keywords in each category of the provided dictionary.
|
52 |
+
|
53 |
+
Identify Similar Keywords: For each category, identify keywords that have similar meanings or can be considered redundant or that can be of same subject keeping in mind JEE syllabus such as algebra,calculus can be merged with maths foundation to increase its frequency and mechanics can be merged into physics foundation to increase its frequency.
|
54 |
+
|
55 |
+
##IMPORTANT EXAMPLES FOR Redundant OR similar meaining keywords examples:(KEEP THESE EXAMPLES IN MIND WHILE MERGING SIMILAR KEYWORDS)
|
56 |
+
## BELOW EXAMPLES CAN BE CONSIDERED ACROSS ALL THE FOUR DICTIONARIES
|
57 |
+
1)Systematic Problem Solving and Problem Solving Skills and Comlpex Problem Solving and Understanding Long Questions, all of these keywords have similar meaning.
|
58 |
+
2)Imbalance in Study Time and Time Management Issues, have same meaning so we can increase frequency of imbalance in study time and then discard time management issues.
|
59 |
+
3)Logical reasoning and Pattern Recognition and high score in reasoning ,all have similar meaning so we can increase frequency of logical reasoning if we encounter patterns recognition or high socre in reasoning and then discrd them.
|
60 |
+
4)Maths Foundation and High Confidence In Mathematics, have same meaning so we can increase the frequency of maths foundation and then discard high confidence in mathematics.
|
61 |
+
5)Physcis Foundation and High Confidence in Physics, have the same meaning.
|
62 |
+
6)Long-term Memory Techniques and Memory Strategies ,have similar meaining.
|
63 |
+
7)Effective Time Allocation and Time managament and Efficient Study Time Allocation ,all have similar meaning so we can increase frequency of effective time allocation and then discard time management and Efficient Study Time Allocation.
|
64 |
+
8)self_awareness and accountability ,have similar meaning so we increase frequency of self_awareness in output and then discard adaptability.
|
65 |
+
9)Daily Mental Calculation Practice and Targeted Practice, these both also have same meaining.
|
66 |
+
10)Practice Quick Mental Calculation and Enhance Mental Calculation Skills and Quick Mental Calculation and Practice Quick Mental Calculation and Weak in Quick in Mental Calculation ,all of these have similar meaning.
|
67 |
+
|
68 |
+
Merge Keywords: When you identify similar keywords:
|
69 |
+
Choose one representative keyword that best encapsulates the meaning of the group.
|
70 |
+
Count the number of times the similar keywords appear in the original list and add this to the frequency of the representative keyword
|
71 |
+
##IMMPORTANT:Discard the similar keywords that have been merged.
|
72 |
+
|
73 |
+
Construct New Dictionary: Create a new dictionary that contains the merged keywords in the category '{category}', ensuring that each keyword only appears once and reflects its updated frequency counts.
|
74 |
+
|
75 |
+
Format the Output: Ensure the output is clear and structured, showing the updated keywords alongside their frequency counts.
|
76 |
+
Now, please combine the following keywords for the category '{category}': {keywords}, while keeping subject-specific keywords like Mathematics, Chemistry, and Physics separate.
|
77 |
+
### EXAMPLE OUTPUT FORMAT:
|
78 |
+
Strength =
|
79 |
+
'Maths Foundation': 3,
|
80 |
+
'Problem-Solving Skills': 4,
|
81 |
+
'Self-Awareness': 4,
|
82 |
+
'Adaptability': 3,
|
83 |
+
'Proactive Approach': 2,
|
84 |
+
'Time Management': 2,
|
85 |
+
'Logical Reasoning': 2,
|
86 |
+
'Scientific Principles': 3,
|
87 |
+
'Physics Foundation': 3,
|
88 |
+
'Critical Thinking': 1,
|
89 |
+
'Analytical Skills': 1,
|
90 |
+
'Long-term Memory': 1,
|
91 |
+
'Effective time allocation': 1,
|
92 |
+
'Confidence in Physics': 1
|
93 |
+
|
94 |
+
1)ALL THE FOUR DICTIONARIES ARE IN THE OUTPUT ONLY ONCE AND IN THE FORMAT SIMILAR TO EXAMPLE, DONT CONSIDER THIS EXAMPLE TO BE ABSOLUTE JUST TAKE IT AS A REFERENCE
|
95 |
+
2KEEP THE MAX NUMBER OF KEYWORDS IN EACH DICTIONARY TO BE 10 IF THERE ARE MORE UNIQUE KEYWORDS THEN TRY TO MAERGE IT WITH THE UNIQUES KEYWORDS HAVING THE CLOSEST MEANING TO IT. AND KEEP ONLY ONE KEYWORD FOR EACH CHEMISTRY,PHYSICS AND MATHS IF THERE WAS ANY KEYWORD SIMILAR TO THEM
|
96 |
+
3)IN THE OUTPUT TEXT SUCH AS python or (Here’s the merged dictionary for the category) OR (This output captures the merged keywords with their updated frequency counts, maintaining the focus on subject-specific terms and avoiding generalizations.) SHOULD NOT BE RETURNED IN THE OUTPUT
|
97 |
+
"""
|
98 |
+
formatted_prompt = prompt_template.format(category=category, keywords=str(keywords))
|
99 |
+
|
100 |
+
response = openai.ChatCompletion.create(
|
101 |
+
model="gpt-4o-mini",
|
102 |
+
messages=[{"role": "system", "content": formatted_prompt}]
|
103 |
+
)
|
104 |
+
return response['choices'][0]['message']['content'].strip()
|
105 |
+
|
106 |
+
def extract_keywords_from_summary(summary):
|
107 |
+
keywords = {}
|
108 |
+
for section in ["Strengths", "Opportunities", "Challenges", "Action Plan"]:
|
109 |
+
try:
|
110 |
+
start = summary.index(section) + len(section) + 2 # Move past the section header
|
111 |
+
end = summary.index("**", start) if "**" in summary[start:] else len(summary) # Find the next section or end
|
112 |
+
keywords[section] = [kw.strip() for kw in summary[start:end].replace('*', '').strip().split("\n") if kw.strip()]
|
113 |
+
except ValueError:
|
114 |
+
keywords[section] = [] # In case the section is not found
|
115 |
+
return keywords
|
116 |
+
|
117 |
+
@app.post("/process-soca/")
|
118 |
+
async def process_soca(data: SOCAInput):
|
119 |
+
combined_keywords = {
|
120 |
+
"Strengths": [],
|
121 |
+
"Opportunities": [],
|
122 |
+
"Challenges": [],
|
123 |
+
"Action Plan": []
|
124 |
+
}
|
125 |
+
total_students=0
|
126 |
+
|
127 |
+
# Extract keywords for each student's SOCA data
|
128 |
+
for student_data in data.student_data:
|
129 |
+
try:
|
130 |
+
summary = generate_keywords(student_data)
|
131 |
+
extracted_keywords=extract_keywords_from_summary(summary)
|
132 |
+
for category in ["Strengths", "Opportunities", "Challenges", "Action Plan"]:
|
133 |
+
if category in extracted_keywords:
|
134 |
+
combined_keywords[category].extend(extracted_keywords[category])
|
135 |
+
total_students+=1
|
136 |
+
except Exception as e:
|
137 |
+
raise HTTPException(status_code=500, detail=str(e))
|
138 |
+
|
139 |
+
|
140 |
+
# Final output dictionary
|
141 |
+
final_keywords_frequency = {
|
142 |
+
"Strengths": {},
|
143 |
+
"Opportunities": {},
|
144 |
+
"Challenges": {},
|
145 |
+
"Action Plan": {}
|
146 |
+
}
|
147 |
+
|
148 |
+
for category in ["Strengths", "Opportunities", "Challenges", "Action Plan"]:
|
149 |
+
keywords_in_category = combined_keywords.get(category, []) # Using .get() to avoid KeyError
|
150 |
+
if keywords_in_category:
|
151 |
+
combined_results = combine_similar_keywords_with_llm(keywords_in_category, category)
|
152 |
+
# Update frequency counts in final_keywords_frequency
|
153 |
+
keyword_lines = combined_results.split('\n')
|
154 |
+
for keyword_line in keyword_lines: # Process each line
|
155 |
+
if ":" in keyword_line:
|
156 |
+
keyword, freq = keyword_line.split(":")
|
157 |
+
keyword = keyword.strip()
|
158 |
+
freq = freq.strip().rstrip(',') # Remove trailing commas
|
159 |
+
try:
|
160 |
+
freq = int(freq) # Convert to integer
|
161 |
+
except ValueError:
|
162 |
+
print(f"Skipping invalid frequency value: {freq}")
|
163 |
+
continue # Skip lines with invalid frequency values
|
164 |
+
final_keywords_frequency[category][keyword] = freq
|
165 |
+
|
166 |
+
return {"keywords": final_keywords_frequency}
|