Spaces:
Sleeping
Sleeping
File size: 8,603 Bytes
d291776 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
""" File that contains flashcards' backend """
import json
import os
from dataclasses import asdict, dataclass
# from dotenv import find_dotenv, load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import ChatPromptTemplate
@dataclass
class Flashcard:
"""
Represents a flashcard containing language translation information.
Attributes:
input_expression (str): The expression in the input language.
input_language (str): The language of the input expression.
output_expression (str): The translated expression in the output language.
output_language (str): The language of the output expression.
example_usage (str): An example usage of the input expression in a sentence.
"""
input_expression: str
input_language: str
output_expression: str
output_language: str
example_usage: str
@classmethod
def from_dict(cls, data: dict) -> "Flashcard":
"""
Creates a Flashcard instance from a dictionary of attributes.
Args:
data (dict): A dictionary containing flashcard attributes.
Returns:
Flashcard: An instance of Flashcard.
"""
return cls(
input_expression=data.get("input_expression", None),
input_language=data.get("input_language", None),
output_expression=data.get("output_expression", None),
output_language=data.get("output_language", None),
example_usage=data.get("example_usage", None),
)
@dataclass
class Flashcards:
"""
Represents a collection of Flashcard instances.
Attributes:
data (list[Flashcard]): A list of Flashcard instances.
"""
data: list[Flashcard]
def as_json(self) -> dict:
"""
Converts the collection of Flashcard instances to a JSON format.
Returns:
dict: A dictionary representing the flashcards in JSON format.
"""
return {"flashcards": [asdict(card) for card in self.data]}
@classmethod
def import_from_json(cls, data: dict) -> "Flashcards":
"""
Creates a Flashcards instance from a JSON file.
Args:
data (file): A JSON file containing flashcard data.
Returns:
Flashcards: An instance of Flashcards containing the imported data.
"""
data = json.load(data)
flashcard_objects = [Flashcard(**card) for card in data["flashcards"]]
return cls(data=flashcard_objects)
def __len__(self) -> int:
"""
Returns the number of Flashcard instances in the collection.
Returns:
int: The number of Flashcard instances.
"""
return len(self.data)
class FlashcardGeneratorOpenAI: # pylint: disable=R0903
"""
A class to generate language learning flashcards using OpenAI's language model.
Attributes:
chat (ChatOpenAI): An instance of ChatOpenAI for generating flashcards.
response_schemas (list): A list of ResponseSchema objects for structuring the response.
output_parser (StructuredOutputParser): Parser to structure the output
from the language model.
flashcard_generator_template (str): A template for generating flashcard data.
prompt (ChatPromptTemplate): A prompt template for the language model.
"""
def __init__(self, api_key: str, llm_model: str = "gpt-3.5-turbo") -> None:
"""
Initializes the FlashcardGeneratorOpenAI class with
the specified API key and language model.
Args:
api_key (str): The API key for OpenAI.
llm_model (str): The name of the language model to use.
"""
self.chat = ChatOpenAI(temperature=0.0, model=llm_model, api_key=api_key)
input_expression_schema = ResponseSchema(
name="input_expression",
type="str",
description="Original expression entered by the user, refined"
" to create translated_expression.",
)
input_language_schema = ResponseSchema(
name="input_language",
type="str",
description="Language of the input expression.",
)
output_expression_schema = ResponseSchema(
name="output_expression",
type="str",
description="Translation of refined expression entered by the user.",
)
output_language_schema = ResponseSchema(
name="output_language",
type="str",
description="Language of the output expression.",
)
example_usage_schema = ResponseSchema(
name="example_usage",
type="str",
description="Example usage of input expression, used to give the user some "
"example context where it could be used. Limited to one sentence.",
)
response_schemas = [
input_expression_schema,
input_language_schema,
output_expression_schema,
output_language_schema,
example_usage_schema,
]
self.output_parser = StructuredOutputParser.from_response_schemas(
response_schemas
)
self.format_instructions = self.output_parser.get_format_instructions()
self.flashcard_generator_template = """\
For the following expression, extract the following information:
input_expression: Original expression entered by the user, but refined to create translated_expression (for flashcard for language learning). If the expression is too long (more than 10 words), it should be shortened while keeping the sense.
input_language: Language of the input expression
output_expression: Refined input expression translated to {output_language} language. Provide 2 alternatives, separated with 'slash' sign (and space before & after the sign).
example_usage: Example usage of input expression, used to give the user some example context where it could be used. Limited to one sentence.
input_expression: {input_expression}
input_language: {input_language}
{format_instructions}
"""
self.prompt = ChatPromptTemplate.from_template(
template=self.flashcard_generator_template
)
def generate_flashcard(
self, input_exp: str, input_lang: str, output_lang: str
) -> Flashcard:
"""
Generates a flashcard by translating an input expression from one language to another.
This method takes an expression in a specified input language, translates it into
a specified output language, and then creates a flashcard containing both the original
and translated expressions. It uses the ChatOpenAI model to generate the translation
and example usage.
Args:
input_expression (str): The expression to be translated.
input_language (str): The language of the input expression.
output_language (str): The language into which the input expression is to be translated.
Returns:
Flashcard: An instance of the Flashcard class containing the original expression,
its translation, and an example usage in the output language.
"""
messages = self.prompt.format_messages(
input_expression=input_exp,
input_language=input_lang,
output_language=output_lang,
format_instructions=self.format_instructions,
)
response = self.chat(messages)
flashcard_dict = self.output_parser.parse(response.content)
return Flashcard.from_dict(flashcard_dict)
def main():
"""
For debugging purposes only
"""
# _ = load_dotenv(find_dotenv()) # Read local .env file
generator = FlashcardGeneratorOpenAI(api_key=os.environ["OPENAI_API_KEY"])
input_expressions = [
"cruel",
"let someone off the hook",
"it absorbed me",
"get my thoughts in order",
"crude",
"pore over",
]
input_language = "English"
output_language = "Polish"
flashcards = Flashcards([])
for input_expression in input_expressions:
flashcard = generator.generate_flashcard(
input_expression, input_language, output_language
)
print(flashcard)
flashcards.data.append(flashcard)
if __name__ == "__main__":
main()
|