samu commited on
Commit
77e56ff
·
1 Parent(s): 8e70df9

optimized backend

Browse files
backend/__pycache__/cache.cpython-310.pyc ADDED
Binary file (1.15 kB). View file
 
backend/__pycache__/cache.cpython-312.pyc ADDED
Binary file (2.24 kB). View file
 
backend/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-310.pyc and b/backend/__pycache__/config.cpython-310.pyc differ
 
backend/__pycache__/config.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ
 
backend/__pycache__/main.cpython-310.pyc CHANGED
Binary files a/backend/__pycache__/main.cpython-310.pyc and b/backend/__pycache__/main.cpython-310.pyc differ
 
backend/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ
 
backend/cache.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from typing import Any, Callable, Dict, Tuple
3
+
4
+ class AsyncLRUCache:
5
+ def __init__(self, maxsize=100_000): # Optimized for 16GB RAM
6
+ self.cache: Dict[Tuple, Any] = {}
7
+ self.order = []
8
+ self.maxsize = maxsize
9
+ self.lock = asyncio.Lock()
10
+
11
+ async def get_or_set(self, key: Tuple, coro: Callable, *args, **kwargs):
12
+ async with self.lock:
13
+ if key in self.cache:
14
+ # Move key to end to show it was recently used
15
+ self.order.remove(key)
16
+ self.order.append(key)
17
+ return self.cache[key]
18
+ # Not cached, compute result
19
+ result = await coro(*args, **kwargs)
20
+ async with self.lock:
21
+ self.cache[key] = result
22
+ self.order.append(key)
23
+ if len(self.order) > self.maxsize:
24
+ oldest = self.order.pop(0)
25
+ del self.cache[oldest]
26
+ return result
27
+
28
+ # Initialize cache with optimized size for 16GB RAM
29
+ cache = AsyncLRUCache() # Uses default maxsize=100_000
backend/config.py CHANGED
@@ -142,15 +142,12 @@ flashcard_mode_instructions = """
142
  # Native language: {native_language}
143
  # Target language: {target_language}
144
  # Proficiency level: {proficiency}
145
-
146
  You are a highly adaptive vocabulary tutor capable of teaching any language. Your goal is to help users learn rapidly by generating personalized flashcards from lesson-based content.
147
-
148
  ### Input Format
149
  You will receive a structured lesson as input (text, dialogue, or vocabulary list). Use this input to:
150
  - Identify new or useful vocabulary terms.
151
  - Extract contextually relevant and domain-specific language.
152
  - Ensure that flashcards reflect the lesson's language, style, and purpose.
153
-
154
  ### Generation Guidelines
155
  When generating flashcards:
156
  1. **Use the provided metadata**:
@@ -160,15 +157,12 @@ When generating flashcards:
160
  - *Beginner*: High-frequency, essential words.
161
  - *Intermediate*: Broader, topic-specific terms and common collocations.
162
  - *Advanced*: Nuanced, idiomatic, or technical vocabulary.
163
-
164
  2. **Contextual relevance**:
165
  - Flashcards should reflect the themes, activities, or domain of the lesson input (e.g., cooking, business, travel).
166
  - Ensure that example sentences are directly related to the input content and sound natural in use.
167
-
168
  3. **Avoid redundancy**:
169
  - Select terms that are novel, useful, or not overly repetitive within the lesson.
170
  - Prioritize terms that learners are likely to encounter again in real-world usage.
171
-
172
  ### Flashcard Format
173
  Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing:
174
  - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
 
142
  # Native language: {native_language}
143
  # Target language: {target_language}
144
  # Proficiency level: {proficiency}
 
145
  You are a highly adaptive vocabulary tutor capable of teaching any language. Your goal is to help users learn rapidly by generating personalized flashcards from lesson-based content.
 
146
  ### Input Format
147
  You will receive a structured lesson as input (text, dialogue, or vocabulary list). Use this input to:
148
  - Identify new or useful vocabulary terms.
149
  - Extract contextually relevant and domain-specific language.
150
  - Ensure that flashcards reflect the lesson's language, style, and purpose.
 
151
  ### Generation Guidelines
152
  When generating flashcards:
153
  1. **Use the provided metadata**:
 
157
  - *Beginner*: High-frequency, essential words.
158
  - *Intermediate*: Broader, topic-specific terms and common collocations.
159
  - *Advanced*: Nuanced, idiomatic, or technical vocabulary.
 
160
  2. **Contextual relevance**:
161
  - Flashcards should reflect the themes, activities, or domain of the lesson input (e.g., cooking, business, travel).
162
  - Ensure that example sentences are directly related to the input content and sound natural in use.
 
163
  3. **Avoid redundancy**:
164
  - Select terms that are novel, useful, or not overly repetitive within the lesson.
165
  - Prioritize terms that learners are likely to encounter again in real-world usage.
 
166
  ### Flashcard Format
167
  Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing:
168
  - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
backend/main.py CHANGED
@@ -3,13 +3,12 @@ from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from backend.utils import generate_completions
 
6
  from backend import config
7
- from backend.database import get_db_connection
8
- import psycopg2
9
- from psycopg2.extras import RealDictCursor
10
  from typing import Union, List, Literal, Optional
11
  import logging
12
  import json
 
13
 
14
  logging.basicConfig(level=logging.INFO)
15
 
@@ -24,14 +23,6 @@ app.add_middleware(
24
  allow_headers=["*"], # Allows all headers
25
  )
26
 
27
- # Dependency to get database connection
28
- async def get_db():
29
- conn = await get_db_connection()
30
- try:
31
- yield conn
32
- finally:
33
- conn.close()
34
-
35
  class Message(BaseModel):
36
  role: Literal["user", "assistant"]
37
  content: str
@@ -46,11 +37,6 @@ class GenerationRequest(BaseModel):
46
  class MetadataRequest(BaseModel):
47
  query: str
48
 
49
- # Global metadata variables
50
- native_language: Optional[str] = None
51
- target_language: Optional[str] = None
52
- proficiency: Optional[str] = None
53
-
54
  @app.get("/")
55
  async def root():
56
  return {"message": "Welcome to the AI Learning Assistant API!"}
@@ -59,15 +45,13 @@ async def root():
59
  async def extract_metadata(data: MetadataRequest):
60
  logging.info(f"Query: {data.query}")
61
  try:
62
- response_str = await generate_completions.get_completions(
 
 
63
  data.query,
64
  config.language_metadata_extraction_prompt
65
  )
66
  metadata_dict = json.loads(response_str)
67
- # Update globals for other endpoints
68
- globals()['native_language'] = metadata_dict.get('native_language', 'unknown')
69
- globals()['target_language'] = metadata_dict.get('target_language', 'unknown')
70
- globals()['proficiency'] = metadata_dict.get('proficiency', 'unknown')
71
  return JSONResponse(
72
  content={
73
  "data": metadata_dict,
@@ -81,109 +65,32 @@ async def extract_metadata(data: MetadataRequest):
81
 
82
  @app.post("/generate/curriculum")
83
  async def generate_curriculum(data: GenerationRequest):
84
- try:
85
- # Use metadata from request or fallback to globals
86
- nl = data.native_language or native_language or "unknown"
87
- tl = data.target_language or target_language or "unknown"
88
- prof = data.proficiency or proficiency or "unknown"
89
- instructions = (
90
- config.curriculum_instructions
91
- .replace("{native_language}", nl)
92
- .replace("{target_language}", tl)
93
- .replace("{proficiency}", prof)
94
- )
95
- response = await generate_completions.get_completions(
96
- data.query,
97
- instructions
98
- )
99
- return JSONResponse(
100
- content={
101
- "data": response,
102
- "type": "curriculum",
103
- "status": "success"
104
- },
105
- status_code=200
106
- )
107
- except Exception as e:
108
- raise HTTPException(status_code=500, detail=str(e))
109
 
110
  @app.post("/generate/flashcards")
111
  async def generate_flashcards(data: GenerationRequest):
112
- try:
113
- nl = data.native_language or native_language or "unknown"
114
- tl = data.target_language or target_language or "unknown"
115
- prof = data.proficiency or proficiency or "unknown"
116
- instructions = (
117
- config.flashcard_mode_instructions
118
- .replace("{native_language}", nl)
119
- .replace("{target_language}", tl)
120
- .replace("{proficiency}", prof)
121
- )
122
- response = await generate_completions.get_completions(
123
- data.query,
124
- instructions
125
- )
126
- return JSONResponse(
127
- content={
128
- "data": response,
129
- "type": "flashcards",
130
- "status": "success"
131
- },
132
- status_code=200
133
- )
134
- except Exception as e:
135
- raise HTTPException(status_code=500, detail=str(e))
136
 
137
  @app.post("/generate/exercises")
138
  async def generate_exercises(data: GenerationRequest):
139
- try:
140
- nl = data.native_language or native_language or "unknown"
141
- tl = data.target_language or target_language or "unknown"
142
- prof = data.proficiency or proficiency or "unknown"
143
- instructions = (
144
- config.exercise_mode_instructions
145
- .replace("{native_language}", nl)
146
- .replace("{target_language}", tl)
147
- .replace("{proficiency}", prof)
148
- )
149
- response = await generate_completions.get_completions(
150
- data.query,
151
- instructions
152
- )
153
- return JSONResponse(
154
- content={
155
- "data": response,
156
- "type": "exercises",
157
- "status": "success"
158
- },
159
- status_code=200
160
- )
161
- except Exception as e:
162
- raise HTTPException(status_code=500, detail=str(e))
163
 
164
  @app.post("/generate/simulation")
165
  async def generate_simulation(data: GenerationRequest):
166
- try:
167
- nl = data.native_language or native_language or "unknown"
168
- tl = data.target_language or target_language or "unknown"
169
- prof = data.proficiency or proficiency or "unknown"
170
- instructions = (
171
- config.simulation_mode_instructions
172
- .replace("{native_language}", nl)
173
- .replace("{target_language}", tl)
174
- .replace("{proficiency}", prof)
175
- )
176
- response = await generate_completions.get_completions(
177
- data.query,
178
- instructions
179
- )
180
- return JSONResponse(
181
- content={
182
- "data": response,
183
- "type": "simulation",
184
- "status": "success"
185
- },
186
- status_code=200
187
- )
188
- except Exception as e:
189
- raise HTTPException(status_code=500, detail=str(e))
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from backend.utils import generate_completions
6
+ from backend.utils.handlers import handle_generation_request, INSTRUCTION_TEMPLATES
7
  from backend import config
 
 
 
8
  from typing import Union, List, Literal, Optional
9
  import logging
10
  import json
11
+ from backend.cache import cache
12
 
13
  logging.basicConfig(level=logging.INFO)
14
 
 
23
  allow_headers=["*"], # Allows all headers
24
  )
25
 
 
 
 
 
 
 
 
 
26
  class Message(BaseModel):
27
  role: Literal["user", "assistant"]
28
  content: str
 
37
  class MetadataRequest(BaseModel):
38
  query: str
39
 
 
 
 
 
 
40
  @app.get("/")
41
  async def root():
42
  return {"message": "Welcome to the AI Learning Assistant API!"}
 
45
  async def extract_metadata(data: MetadataRequest):
46
  logging.info(f"Query: {data.query}")
47
  try:
48
+ response_str = await cache.get_or_set(
49
+ (str(data.query), config.language_metadata_extraction_prompt),
50
+ generate_completions.get_completions,
51
  data.query,
52
  config.language_metadata_extraction_prompt
53
  )
54
  metadata_dict = json.loads(response_str)
 
 
 
 
55
  return JSONResponse(
56
  content={
57
  "data": metadata_dict,
 
65
 
66
  @app.post("/generate/curriculum")
67
  async def generate_curriculum(data: GenerationRequest):
68
+ return await handle_generation_request(
69
+ data=data,
70
+ mode="curriculum",
71
+ instructions_template=INSTRUCTION_TEMPLATES["curriculum"]
72
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  @app.post("/generate/flashcards")
75
  async def generate_flashcards(data: GenerationRequest):
76
+ return await handle_generation_request(
77
+ data=data,
78
+ mode="flashcards",
79
+ instructions_template=INSTRUCTION_TEMPLATES["flashcards"]
80
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  @app.post("/generate/exercises")
83
  async def generate_exercises(data: GenerationRequest):
84
+ return await handle_generation_request(
85
+ data=data,
86
+ mode="exercises",
87
+ instructions_template=INSTRUCTION_TEMPLATES["exercises"]
88
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  @app.post("/generate/simulation")
91
  async def generate_simulation(data: GenerationRequest):
92
+ return await handle_generation_request(
93
+ data=data,
94
+ mode="simulation",
95
+ instructions_template=INSTRUCTION_TEMPLATES["simulation"]
96
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/utils/__pycache__/generate_completions.cpython-310.pyc CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-310.pyc and b/backend/utils/__pycache__/generate_completions.cpython-310.pyc differ
 
backend/utils/__pycache__/generate_completions.cpython-312.pyc CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ
 
backend/utils/__pycache__/handlers.cpython-310.pyc ADDED
Binary file (1.94 kB). View file
 
backend/utils/__pycache__/handlers.cpython-312.pyc ADDED
Binary file (2.71 kB). View file
 
backend/utils/generate_completions.py CHANGED
@@ -40,32 +40,6 @@ def process_input(data: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dic
40
  else:
41
  raise TypeError("Input must be a string or a list of dictionaries with a 'content' field")
42
 
43
-
44
- # async def get_completions(
45
- # prompt: Union[str, List[Dict[str, str]]],
46
- # instructions: str
47
- # ) -> str:
48
- # processed_prompt = process_input(prompt) # Ensures the input format is correct
49
-
50
- # if isinstance(processed_prompt, str):
51
- # messages = [
52
- # {"role": "system", "content": instructions},
53
- # {"role": "user", "content": processed_prompt}
54
- # ]
55
- # elif isinstance(processed_prompt, list):
56
- # messages = [{"role": "system", "content": instructions}] + processed_prompt
57
- # else:
58
- # raise TypeError("Unexpected processed input type.")
59
-
60
- # response = await client.chat.completions.create(
61
- # model=os.getenv("MODEL"),
62
- # messages=messages,
63
- # response_format={"type": "json_object"}
64
- # )
65
-
66
- # output: str = response.choices[0].message.content
67
- # return output
68
-
69
  async def get_completions(
70
  prompt: Union[str, List[Dict[str, str]]],
71
  instructions: str
 
40
  else:
41
  raise TypeError("Input must be a string or a list of dictionaries with a 'content' field")
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  async def get_completions(
44
  prompt: Union[str, List[Dict[str, str]]],
45
  instructions: str
backend/utils/handlers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Callable, Dict, Any
4
+ from backend import config
5
+ from backend.cache import cache
6
+ from backend.utils import generate_completions
7
+
8
+ async def handle_generation_request(
9
+ data: Any,
10
+ mode: str,
11
+ instructions_template: str
12
+ ) -> JSONResponse:
13
+ """
14
+ Shared handler for all generation endpoints (curriculum, flashcards, exercises, simulation).
15
+
16
+ Args:
17
+ data: The GenerationRequest object containing query and metadata
18
+ mode: The type of generation (curriculum, flashcards, exercises, simulation)
19
+ instructions_template: The template string from config to use
20
+
21
+ Returns:
22
+ JSONResponse with the generated content
23
+
24
+ Raises:
25
+ HTTPException: If required metadata is missing or other errors occur
26
+ """
27
+ # Validate required metadata
28
+ if not (data.native_language and data.target_language and data.proficiency):
29
+ raise HTTPException(
30
+ status_code=400,
31
+ detail="native_language, target_language, and proficiency are required. Please extract metadata first."
32
+ )
33
+
34
+ # Format instructions with metadata
35
+ instructions = (
36
+ instructions_template
37
+ .replace("{native_language}", data.native_language)
38
+ .replace("{target_language}", data.target_language)
39
+ .replace("{proficiency}", data.proficiency)
40
+ )
41
+
42
+ # Get response from cache or generate new
43
+ response = await cache.get_or_set(
44
+ (str(data.query), instructions),
45
+ generate_completions.get_completions,
46
+ data.query,
47
+ instructions
48
+ )
49
+
50
+ return JSONResponse(
51
+ content={
52
+ "data": response,
53
+ "type": mode,
54
+ "status": "success"
55
+ },
56
+ status_code=200
57
+ )
58
+
59
+ # Mapping of modes to their instruction templates
60
+ INSTRUCTION_TEMPLATES: Dict[str, str] = {
61
+ "curriculum": config.curriculum_instructions,
62
+ "flashcards": config.flashcard_mode_instructions,
63
+ "exercises": config.exercise_mode_instructions,
64
+ "simulation": config.simulation_mode_instructions
65
+ }
prev_backend_v3/backend/__pycache__/config.cpython-310.pyc ADDED
Binary file (12 kB). View file
 
prev_backend_v3/backend/__pycache__/config.cpython-312.pyc ADDED
Binary file (17.4 kB). View file
 
prev_backend_v3/backend/__pycache__/database.cpython-310.pyc ADDED
Binary file (10.1 kB). View file
 
prev_backend_v3/backend/__pycache__/database.cpython-312.pyc ADDED
Binary file (12.6 kB). View file
 
prev_backend_v3/backend/__pycache__/main.cpython-310.pyc ADDED
Binary file (3.28 kB). View file
 
prev_backend_v3/backend/__pycache__/main.cpython-312.pyc ADDED
Binary file (8.49 kB). View file
 
prev_backend_v3/backend/config.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_metadata_extraction_prompt = """
2
+ You are a language learning assistant. Your task is to analyze the user's input and infer their:
3
+ - Native language (use the language of the input as a fallback if unsure)
4
+ - Target language (the one they want to learn)
5
+ - Proficiency level (beginner, intermediate, or advanced)
6
+ - Title (a brief title summarizing the user's language learning context, written in the user's native language)
7
+ - Description (a catchy, short description of their learning journey, written in the user's native language)
8
+
9
+ Respond ONLY with a valid JSON object using the following format:
10
+
11
+ {
12
+ "native_language": "<user's native language>",
13
+ "target_language": "<language the user wants to learn>",
14
+ "proficiency": "<beginner | intermediate | advanced>",
15
+ "title": "<brief title summarizing the learning context, in the native language>",
16
+ "description": "<catchy, short description of the learning journey, in the native language>"
17
+ }
18
+
19
+ Guidelines:
20
+ - If the user's native language is not explicitly stated, assume it's the same as the language used in the query.
21
+ - If the target language is mentioned indirectly (e.g., "my Dutch isn't great"), infer that as the target language.
22
+ - Make a reasonable guess at proficiency based on clues like "isn't great" → beginner or "I want to improve" → intermediate.
23
+ - If you cannot infer something at all, write "unknown" for native_language, target_language, or proficiency.
24
+ - After inferring the native language, ALWAYS generate the title and description in that language, regardless of the query language or any other context.
25
+ - For title, create a concise phrase (e.g., "Beginner Dutch Adventure" or "Improving Spanish Skills") based on the inferred target language and proficiency, and write it in the user's native language.
26
+ - For description, craft a catchy, short sentence (10-15 words max) that captures the user's learning journey, and write it in the user's native language.
27
+ - If target_language or proficiency is "unknown," use generic but engaging phrases for title and description (e.g., "Language Learning Quest," "Embarking on a new linguistic journey!"), but always in the user's native language.
28
+ - Do not include any explanations, comments, or formatting — only valid JSON.
29
+
30
+ Example:
31
+ User query: "i want to improve my english"
32
+ Expected output:
33
+ {
34
+ "native_language": "english",
35
+ "target_language": "english",
36
+ "proficiency": "intermediate",
37
+ "title": "Improving English Skills",
38
+ "description": "A journey to perfect English for greater fluency and confidence!"
39
+ }
40
+ """
41
+
42
+ curriculum_instructions = """
43
+ # Metadata:
44
+ # Native language: {native_language}
45
+ # Target language: {target_language}
46
+ # Proficiency level: {proficiency}
47
+
48
+ You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}.
49
+
50
+ ### Curriculum Goals:
51
+ - Provide 25 lessons.
52
+ - Ensure logical progression from basic to advanced topics (according to {proficiency}).
53
+ - Align each lesson with a practical communication goal.
54
+ - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life).
55
+
56
+ ### Instructions:
57
+
58
+ 1. **Define the Lesson Series (Overall Theme):**
59
+ - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions").
60
+ - The theme should guide the tone, content, and scope of the entire 25-lesson sequence.
61
+
62
+ 2. **Divide the Curriculum into 25 Thematic Lessons:**
63
+ - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation).
64
+ - Sequence lessons to build from foundational topics to more complex, specialized language use.
65
+ - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage.
66
+
67
+ 3. **Describe Each Lesson Clearly and Concisely:**
68
+ For each of the 25 lessons, provide:
69
+ - "sub_topic": A clear and practical lesson title in {native_language}.
70
+ - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation").
71
+ - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented.
72
+
73
+ ### Output Format:
74
+ Return a valid JSON object with:
75
+ - "lesson_topic": The overall learning theme (in {native_language}).
76
+ - "sub_topics": A list of 25 items. Each item must include:
77
+ - "sub_topic": A short title of the lesson (in {native_language}).
78
+ - "keywords": A list of 1–3 general-purpose categories (in {native_language}).
79
+ - "description": One clear sentence (in {native_language}) describing the purpose of the lesson.
80
+
81
+ Avoid:
82
+ - Using overly generic or repetitive titles or descriptions.
83
+ - Keyword lists with only one-word entries (e.g., use "ordering in a restaurant" instead of "food").
84
+ - Abstract lessons with no real-world relevance.
85
+
86
+ Ensure the curriculum builds toward user fluency in relevant contexts.
87
+ """
88
+
89
+ exercise_mode_instructions = """
90
+ # Metadata:
91
+ # Native language: {native_language}
92
+ # Target language: {target_language}
93
+ # Proficiency level: {proficiency}
94
+
95
+ You are a smart, context-aware language exercise generator. Your task is to create personalized cloze-style exercises that help learners reinforce vocabulary and grammar through realistic, domain-specific practice. You support any language.
96
+
97
+ ### Input Format
98
+ You will receive a structured lesson or topic description (e.g., text excerpt, dialogue, thematic scenario). For example, this could be a short paragraph about daily routines, a dialogue between a customer and a shopkeeper, or a scenario involving travel planning. Use it to:
99
+ - Identify 5 concrete vocabulary items or grammar points suited to the learner’s immediate needs.
100
+ - Ground each exercise in a specific, vivid scenario.
101
+ - Reflect real-world tasks or conversations the learner will encounter.
102
+
103
+ ### Generation Guidelines
104
+ 1. **Metadata usage**
105
+ - **Native language**: Use {native_language} for all explanations.
106
+ - **Target language**: Use {target_language} for sentences, answers, and choices.
107
+ - **Proficiency**:
108
+ - *Beginner*: Focus on high-frequency vocabulary and simple grammar structures, such as present tense, basic prepositions, and common nouns and verbs.
109
+ - *Intermediate*: Incorporate a mix of common and thematic vocabulary, and introduce one new tense or grammatical structure per exercise.
110
+ - *Advanced*: Use domain-specific terminology, idiomatic expressions, and complex syntax to challenge learners.
111
+
112
+ 2. **Sentence specificity**
113
+ - Craft each sentence around a concrete action, object, or event (e.g., “At the café counter, she ___ her order,” not “I want to ___”). To make exercises more engaging, consider adding details that paint a vivid picture, such as specific locations, times, or characters. For instance, use "On a sunny Saturday morning, Maria is heading to the local farmers' market to buy fresh produce" instead of "I am going to the store."
114
+ - Avoid “template” prompts like “I am going to ___” or “I like to ___” without added context.
115
+ - Each sentence must clearly point to one—and only one—correct word or structure.
116
+
117
+ 3. **Unique, unambiguous answers**
118
+ - Design each prompt so distractors could be grammatically plausible but contextually impossible. For example, if the sentence is "She ___ the book on the table," and the correct answer is "put," ensure only "put" fits the context, while distractors like "placed," "set," or "laid" are plausible but incorrect here.
119
+ - Ensure there is no secondary interpretation that could validate another choice.
120
+
121
+ 4. **Plausible distractors**
122
+ - Provide four total options: one correct, three context-related but incorrect.
123
+ - Distractors must belong to the same word class (noun, verb, adjective, etc.) and semantic field.
124
+ - Shuffle answer positions randomly.
125
+ - Ensure distractors are not too similar to the correct answer to avoid confusion.
126
+
127
+ 5. **Explanations**
128
+ - Offer a concise 1–2-sentence rationale in {native_language}, explaining why the correct answer fits this very context and briefly noting why each distractor fails. If space allows, consider adding a brief example or analogy to reinforce the learning point.
129
+
130
+ ### Output Format
131
+ Return exactly **5** cloze-style exercises as a **JSON array**, each element with:
132
+ - `"sentence"`: A fully contextualized sentence in {target_language} containing one blank (`___`).
133
+ - `"answer"`: The single correct fill-in, in {target_language}.
134
+ - `"choices"`: A list of four total options (in randomized order), all in {target_language}.
135
+ - `"explanation"`: A concise note in {native_language} clarifying the correct answer and why others don’t fit.
136
+
137
+ _Do not wrap the array in any additional objects or metadata—output only the raw JSON array._
138
+ """
139
+
140
+ flashcard_mode_instructions = """
141
+ # Metadata:
142
+ # Native language: {native_language}
143
+ # Target language: {target_language}
144
+ # Proficiency level: {proficiency}
145
+
146
+ You are a highly adaptive vocabulary tutor capable of teaching any language. Your goal is to help users learn rapidly by generating personalized flashcards from lesson-based content.
147
+
148
+ ### Input Format
149
+ You will receive a structured lesson as input (text, dialogue, or vocabulary list). Use this input to:
150
+ - Identify new or useful vocabulary terms.
151
+ - Extract contextually relevant and domain-specific language.
152
+ - Ensure that flashcards reflect the lesson's language, style, and purpose.
153
+
154
+ ### Generation Guidelines
155
+ When generating flashcards:
156
+ 1. **Use the provided metadata**:
157
+ - **Native language**: Use {native_language} for definitions.
158
+ - **Target language**: Extract and present vocabulary and examples in {target_language}.
159
+ - **Proficiency level**: Adjust vocabulary complexity based on {proficiency}:
160
+ - *Beginner*: High-frequency, essential words.
161
+ - *Intermediate*: Broader, topic-specific terms and common collocations.
162
+ - *Advanced*: Nuanced, idiomatic, or technical vocabulary.
163
+
164
+ 2. **Contextual relevance**:
165
+ - Flashcards should reflect the themes, activities, or domain of the lesson input (e.g., cooking, business, travel).
166
+ - Ensure that example sentences are directly related to the input content and sound natural in use.
167
+
168
+ 3. **Avoid redundancy**:
169
+ - Select terms that are novel, useful, or not overly repetitive within the lesson.
170
+ - Prioritize terms that learners are likely to encounter again in real-world usage.
171
+
172
+ ### Flashcard Format
173
+ Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing:
174
+ - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
175
+ - `"definition"`: A learner-friendly explanation in {native_language}.
176
+ - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**.
177
+ """
178
+
179
+ simulation_mode_instructions = """
180
+ # Metadata:
181
+ # Native language: {native_language}
182
+ # Target language: {target_language}
183
+ # Proficiency level: {proficiency}
184
+
185
+ You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant. Stories must reflect the user's interests, profession, or hobbies, and align with their learning level.
186
+
187
+ ### Input Format
188
+ You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to:
189
+ - Personalize characters, setting, and vocabulary.
190
+ - Make the story both educational and entertaining.
191
+ - Ensure the language reflects real-world use in that context.
192
+
193
+ ### Story Generation Task
194
+ 1. **Use the provided metadata**:
195
+ - **Native language**: Present explanations, setup, and translations in {native_language}.
196
+ - **Target language**: Write dialogue and narration in {target_language}.
197
+ - **Proficiency level**: Match language complexity to {proficiency}:
198
+ - *Beginner*: Simple grammar, short sentences, high-frequency vocabulary.
199
+ - *Intermediate*: Natural sentence flow, basic narrative devices, slightly challenging vocabulary.
200
+ - *Advanced*: Complex structures, idiomatic expressions, domain-specific language.
201
+
202
+ 2. **Domain relevance**:
203
+ - Base the story or dialogue on the user’s interests or specified topic.
204
+ - Integrate relevant vocabulary and situations (e.g., a chef character using cooking terms, or a pilot discussing navigation).
205
+
206
+ 3. **Engagement and originality**:
207
+ - Make the story fun, dramatic, or surprising to increase engagement.
208
+ - Avoid clichés and repetition—each story should be fresh and imaginative.
209
+ - Vary tone and structure depending on the theme (e.g., suspenseful for a mystery, humorous for a slice-of-life scene).
210
+
211
+ 4. **Educational value**:
212
+ - Use natural-sounding language learners would benefit from hearing or using.
213
+ - Provide translations and (where helpful) phonetic transcription to support pronunciation and comprehension.
214
+
215
+ ### Output Format
216
+ Return a valid **JSON object** with the following structure:
217
+ - `"title"`: An engaging title in {native_language}.
218
+ - `"setting"`: A brief setup paragraph in {native_language} explaining the story’s background and relevance to the user’s interest.
219
+ - `"content"`: A list of **10 segments**, each structured as:
220
+ - `"speaker"`: A named or role-based character label in {native_language} (e.g., "Narrator", "Captain Li", "The Botanist").
221
+ - `"target_language_text"`: The sentence or dialogue line in {target_language}.
222
+ - `"phonetics"`: A phonetic transcription (IPA, Pinyin, etc.), only if helpful or relevant for the target language.
223
+ - `"base_language_translation"`: A simple, clear translation in {native_language}.
224
+
225
+ Ensure that all entries are structured cleanly and consistently. Do not wrap the result in additional containers or metadata.
226
+ """
{backend → prev_backend_v3/backend}/database.py RENAMED
File without changes
prev_backend_v3/backend/main.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ from backend.utils import generate_completions
6
+ from backend import config
7
+ from backend.database import get_db_connection
8
+ import psycopg2
9
+ from psycopg2.extras import RealDictCursor
10
+ from typing import Union, List, Literal, Optional
11
+ import logging
12
+ import json
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ app = FastAPI()
17
+
18
+ # Add CORS middleware
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"], # Allows all origins
22
+ allow_credentials=True,
23
+ allow_methods=["*"], # Allows all methods
24
+ allow_headers=["*"], # Allows all headers
25
+ )
26
+
27
+ # Dependency to get database connection
28
+ async def get_db():
29
+ conn = await get_db_connection()
30
+ try:
31
+ yield conn
32
+ finally:
33
+ conn.close()
34
+
35
+ class Message(BaseModel):
36
+ role: Literal["user", "assistant"]
37
+ content: str
38
+
39
+ class GenerationRequest(BaseModel):
40
+ user_id: int
41
+ query: Union[str, List[Message]]
42
+ native_language: Optional[str] = None
43
+ target_language: Optional[str] = None
44
+ proficiency: Optional[str] = None
45
+
46
+ class MetadataRequest(BaseModel):
47
+ query: str
48
+
49
+ # Global metadata variables
50
+ native_language: Optional[str] = None
51
+ target_language: Optional[str] = None
52
+ proficiency: Optional[str] = None
53
+
54
+ @app.get("/")
55
+ async def root():
56
+ return {"message": "Welcome to the AI Learning Assistant API!"}
57
+
58
+ @app.post("/extract/metadata")
59
+ async def extract_metadata(data: MetadataRequest):
60
+ logging.info(f"Query: {data.query}")
61
+ try:
62
+ response_str = await generate_completions.get_completions(
63
+ data.query,
64
+ config.language_metadata_extraction_prompt
65
+ )
66
+ metadata_dict = json.loads(response_str)
67
+ # Update globals for other endpoints
68
+ globals()['native_language'] = metadata_dict.get('native_language', 'unknown')
69
+ globals()['target_language'] = metadata_dict.get('target_language', 'unknown')
70
+ globals()['proficiency'] = metadata_dict.get('proficiency', 'unknown')
71
+ return JSONResponse(
72
+ content={
73
+ "data": metadata_dict,
74
+ "type": "language_metadata",
75
+ "status": "success"
76
+ },
77
+ status_code=200
78
+ )
79
+ except Exception as e:
80
+ raise HTTPException(status_code=500, detail=str(e))
81
+
82
+ @app.post("/generate/curriculum")
83
+ async def generate_curriculum(data: GenerationRequest):
84
+ try:
85
+ # Use metadata from request or fallback to globals
86
+ nl = data.native_language or native_language or "unknown"
87
+ tl = data.target_language or target_language or "unknown"
88
+ prof = data.proficiency or proficiency or "unknown"
89
+ instructions = (
90
+ config.curriculum_instructions
91
+ .replace("{native_language}", nl)
92
+ .replace("{target_language}", tl)
93
+ .replace("{proficiency}", prof)
94
+ )
95
+ response = await generate_completions.get_completions(
96
+ data.query,
97
+ instructions
98
+ )
99
+ return JSONResponse(
100
+ content={
101
+ "data": response,
102
+ "type": "curriculum",
103
+ "status": "success"
104
+ },
105
+ status_code=200
106
+ )
107
+ except Exception as e:
108
+ raise HTTPException(status_code=500, detail=str(e))
109
+
110
+ @app.post("/generate/flashcards")
111
+ async def generate_flashcards(data: GenerationRequest):
112
+ try:
113
+ nl = data.native_language or native_language or "unknown"
114
+ tl = data.target_language or target_language or "unknown"
115
+ prof = data.proficiency or proficiency or "unknown"
116
+ instructions = (
117
+ config.flashcard_mode_instructions
118
+ .replace("{native_language}", nl)
119
+ .replace("{target_language}", tl)
120
+ .replace("{proficiency}", prof)
121
+ )
122
+ response = await generate_completions.get_completions(
123
+ data.query,
124
+ instructions
125
+ )
126
+ return JSONResponse(
127
+ content={
128
+ "data": response,
129
+ "type": "flashcards",
130
+ "status": "success"
131
+ },
132
+ status_code=200
133
+ )
134
+ except Exception as e:
135
+ raise HTTPException(status_code=500, detail=str(e))
136
+
137
+ @app.post("/generate/exercises")
138
+ async def generate_exercises(data: GenerationRequest):
139
+ try:
140
+ nl = data.native_language or native_language or "unknown"
141
+ tl = data.target_language or target_language or "unknown"
142
+ prof = data.proficiency or proficiency or "unknown"
143
+ instructions = (
144
+ config.exercise_mode_instructions
145
+ .replace("{native_language}", nl)
146
+ .replace("{target_language}", tl)
147
+ .replace("{proficiency}", prof)
148
+ )
149
+ response = await generate_completions.get_completions(
150
+ data.query,
151
+ instructions
152
+ )
153
+ return JSONResponse(
154
+ content={
155
+ "data": response,
156
+ "type": "exercises",
157
+ "status": "success"
158
+ },
159
+ status_code=200
160
+ )
161
+ except Exception as e:
162
+ raise HTTPException(status_code=500, detail=str(e))
163
+
164
+ @app.post("/generate/simulation")
165
+ async def generate_simulation(data: GenerationRequest):
166
+ try:
167
+ nl = data.native_language or native_language or "unknown"
168
+ tl = data.target_language or target_language or "unknown"
169
+ prof = data.proficiency or proficiency or "unknown"
170
+ instructions = (
171
+ config.simulation_mode_instructions
172
+ .replace("{native_language}", nl)
173
+ .replace("{target_language}", tl)
174
+ .replace("{proficiency}", prof)
175
+ )
176
+ response = await generate_completions.get_completions(
177
+ data.query,
178
+ instructions
179
+ )
180
+ return JSONResponse(
181
+ content={
182
+ "data": response,
183
+ "type": "simulation",
184
+ "status": "success"
185
+ },
186
+ status_code=200
187
+ )
188
+ except Exception as e:
189
+ raise HTTPException(status_code=500, detail=str(e))
prev_backend_v3/backend/utils/__pycache__/generate_completions.cpython-310.pyc ADDED
Binary file (2.55 kB). View file
 
prev_backend_v3/backend/utils/__pycache__/generate_completions.cpython-312.pyc ADDED
Binary file (3.73 kB). View file
 
prev_backend_v3/backend/utils/generate_completions.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import AsyncOpenAI, OpenAI
2
+ import asyncio
3
+ import json
4
+ from typing import AsyncIterator
5
+ from typing import Union, List, Dict, Literal
6
+ from dotenv import load_dotenv
7
+ import os
8
+ from pydantic import BaseModel
9
+ load_dotenv()
10
+
11
+ # Initialize the async client
12
+ client = AsyncOpenAI(
13
+ base_url=os.getenv("BASE_URL"),
14
+ api_key=os.getenv("API_KEY"),
15
+ )
16
+
17
+ class Message(BaseModel):
18
+ role: Literal["user", "assistant"]
19
+ content: str
20
+
21
+ # Helper function to flatten chat messages into a single string prompt
22
+ def flatten_messages(messages: List[Message]) -> str:
23
+ return "\n".join([f"{m.role}: {m.content}" for m in messages])
24
+
25
+ def process_input(data: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]:
26
+ """
27
+ Processes input to either uppercase a string or modify the 'content' field
28
+ of a list of dictionaries.
29
+ """
30
+ if isinstance(data, str):
31
+ return data.strip() # Ensures prompt is cleaned up (optional)
32
+
33
+ elif isinstance(data, list):
34
+ # Ensure each item in the list is a dictionary with a 'content' key
35
+ return [
36
+ {**item, "content": item["content"].strip()} # Trims whitespace in 'content'
37
+ for item in data if isinstance(item, dict) and "content" in item
38
+ ]
39
+
40
+ else:
41
+ raise TypeError("Input must be a string or a list of dictionaries with a 'content' field")
42
+
43
+
44
+ # async def get_completions(
45
+ # prompt: Union[str, List[Dict[str, str]]],
46
+ # instructions: str
47
+ # ) -> str:
48
+ # processed_prompt = process_input(prompt) # Ensures the input format is correct
49
+
50
+ # if isinstance(processed_prompt, str):
51
+ # messages = [
52
+ # {"role": "system", "content": instructions},
53
+ # {"role": "user", "content": processed_prompt}
54
+ # ]
55
+ # elif isinstance(processed_prompt, list):
56
+ # messages = [{"role": "system", "content": instructions}] + processed_prompt
57
+ # else:
58
+ # raise TypeError("Unexpected processed input type.")
59
+
60
+ # response = await client.chat.completions.create(
61
+ # model=os.getenv("MODEL"),
62
+ # messages=messages,
63
+ # response_format={"type": "json_object"}
64
+ # )
65
+
66
+ # output: str = response.choices[0].message.content
67
+ # return output
68
+
69
+ async def get_completions(
70
+ prompt: Union[str, List[Dict[str, str]]],
71
+ instructions: str
72
+ ) -> str:
73
+ if isinstance(prompt, list):
74
+ formatted_query = flatten_messages(prompt)
75
+ else:
76
+ formatted_query = prompt
77
+
78
+ processed_prompt = process_input(formatted_query)
79
+
80
+ messages = [{"role": "system", "content": instructions}]
81
+
82
+ if isinstance(processed_prompt, str):
83
+ messages.append({"role": "user", "content": processed_prompt})
84
+
85
+ elif isinstance(processed_prompt, list):
86
+ # Only keep the history for context and append the latest user query at the end
87
+ history = processed_prompt[:-1]
88
+ last_user_msg = processed_prompt[-1]
89
+
90
+ # Optional: Validate that the last message is from the user
91
+ if last_user_msg.get("role") != "user":
92
+ raise ValueError("Last message must be from the user.")
93
+
94
+ messages += history
95
+ messages.append(last_user_msg)
96
+
97
+ else:
98
+ raise TypeError("Unexpected processed input type.")
99
+
100
+ # print(os.getenv("MODEL"))
101
+ response = await client.chat.completions.create(
102
+ model=os.getenv("MODEL"),
103
+ messages=messages,
104
+ response_format={"type": "json_object"}
105
+ )
106
+
107
+ return response.choices[0].message.content # adjust based on your client