gov-tech-lab / app.py
deepakaiplanet's picture
Upload 6 files
39ea455 verified
raw
history blame
5.27 kB
import gradio as gr
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
from dotenv import load_dotenv
load_dotenv()
open_api_key = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = open_api_key
system_prompt_1 = """
You are an advanced AI assistant tasked with helping to develop a system that automatically transcribes texts into
simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language).
This system is intended to streamline the creation of accessible content for government websites, where the current
manual process is time-consuming and limits the deployment of simplified language texts.
Requirements:
1. Input and Output Formats:
- Input Formats: The AI tool must accept input in Rich Text Format (.rtf) and Free Text (.txt).
- Output Formats: The output should be generated in the same format as the input file (i.e., if the input is .rtf, the
output should be .rtf, and if the input is .txt, the output should be .txt).
- Default Output Language: The output language must match the language detected in the input file.
2. Language Simplification Rules:
- The transcription must adhere to the rules of FALC and "Leichte Sprache," ensuring the content is simple, clear, and
accessible.
- Use simple vocabulary and avoid complex terms.
- Construct short, straightforward sentences with one main idea per sentence.
- Structure information clearly, using bullet points or numbered lists where applicable.
- Incorporate illustrations, icons, or symbols to support textual information if needed.
3. Accessibility Standards:
- The final solution must comply with accessibility standards to ensure content is usable by individuals with intellectual
disabilities and other target groups.
- Ensure that the output is compatible with screen readers and other assistive technologies.
4. Scalability and Efficiency:
- The tool should significantly reduce the time required for the transcription process compared to the current manual
methods.
- It should be capable of handling large volumes of text efficiently to support widespread deployment across various
government websites.
5. User Collaboration:
- The tool should allow for revisions and feedback from collaborators affected by intellectual disabilities to ensure the
output meets the necessary standards of FALC and "Leichte Sprache."
Instructions for AI Development:
Implement a language detection mechanism to identify the language of the input text.
Develop natural language processing (NLP) models trained specifically on FALC and "Leichte Sprache" guidelines to
accurately transcribe complex texts into simplified language.
Ensure the models are capable of maintaining the context and meaning of the original text while simplifying its language.
Include features for user feedback and revisions to refine and improve the transcriptions based on real-world use and
collaborator input.
Test the tool rigorously to ensure it meets accessibility standards and performs well across different types of content
and input formats.
Your goal is to create an AI tool that makes the process of generating FALC and "Leichte Sprache" content more efficient,
scalable, and accessible, ultimately facilitating better communication and inclusivity on government websites.
User Text: {text}
transcribes text: """
system_prompt_2 = """Please translate the following text field content in english langauge.
text: {text}
"""
def translate_text(file, text_input):
llm = OpenAI()
with open(file.name, 'r', encoding='utf-8') as f:
file_text = f.read()
template_1 = PromptTemplate(input_variables=["text"], template=system_prompt_1)
prompt_1 = template_1.format(text=file_text)
file_translation = llm(prompt_1)
template_2 = PromptTemplate(input_variables=["text"], template=system_prompt_2)
prompt_2 = template_2.format(text=file_translation)
text_translation_op = llm(prompt_2)
template_3 = PromptTemplate(input_variables=["text"], template=system_prompt_2)
prompt_3 = template_2.format(text=file_text)
text_translation_ip = llm(prompt_3)
output_file_path = "translated_file.txt"
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write(file_translation)
return text_translation_ip, file_translation, text_translation_op, output_file_path
iface = gr.Interface(
fn=translate_text,
inputs=[
gr.File(label="Upload Text File")
],
outputs=[
gr.Textbox(label="transcribes content in english Translated of input content"),
gr.Textbox(label="transcribes content"),
gr.Textbox(label="transcribes content in english Translated of output content"),
gr.File(label="Download Translated File Text")
],
title="Text Transcribes",
description="Upload a text file and provide a text input to translate the text using LangChain and OpenAI with predefined system prompts.",
allow_flagging="never"
)
iface.launch(debug=True)