File size: 2,956 Bytes
39ea455
 
 
 
 
a6f222d
39ea455
 
 
 
 
8b49c77
39ea455
8b49c77
39ea455
 
 
8b49c77
 
 
 
 
 
 
39ea455
 
 
 
a6f222d
 
 
 
8b01349
a6f222d
 
 
 
88120b9
a6f222d
 
8b01349
 
 
 
 
 
 
 
 
 
 
 
 
a6f222d
 
 
8b01349
 
 
 
 
 
 
 
39ea455
 
8b01349
 
39ea455
8b01349
39ea455
 
8b01349
39ea455
8b01349
 
39ea455
8b01349
 
39ea455
 
 
 
8b01349
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint

load_dotenv()


system_prompt_1 = """
    You are an advanced AI assistant tasked with helping to  transcribes given texts into
    simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language). 
    This system is intended to streamline the creation of accessible content for government websites.

     Instructions for AI Development:

    detect the language of text given then transcribes text into the same language which the guidelines of 
    FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language) and
     accurately transcribe complex texts into simplified language.
  
    Ensure maintaining the context and meaning of the original text while simplifying its language.
    
    text: {text}

    transcribes text: """

def translate_text(file, text_input):


    repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

   # Initialize the HuggingFace endpoint
    llm = HuggingFaceEndpoint(
        repo_id=repo_id,
        max_length=128,
        temperature=0.5,
        huggingfacehub_api_token=os.environ["api_token"] ,
    )
    
    # Read the input file based on its type
    if file.name.endswith('.txt'):
        with open(file.name, 'r', encoding='utf-8') as f:
            file_text = f.read()
    elif file.name.endswith('.rtf'):
        import pyth.plugins.rtf15.reader as rtf15_reader
        import pyth.plugins.plaintext.writer as plaintext_writer
        doc = rtf15_reader.read(f)
        file_text = plaintext_writer.write(doc).getvalue()
    else:
        raise ValueError("Unsupported file type. Please upload a .txt or .rtf file.")
    
    # Define the prompt template and create the LLM chain
    prompt = PromptTemplate.from_template(system_prompt_1)
    llm_chain = prompt | llm
    file_translation = llm_chain.invoke({"text": file_text})
    
    # Determine the output file path and type
    if file.name.endswith('.txt'):
        output_file_path = "translated_file.txt"
    elif file.name.endswith('.rtf'):
        output_file_path = "translated_file.rtf"
    
    # Write the translated text to the output file
    with open(output_file_path, 'w', encoding='utf-8') as f:
        f.write(file_translation)
    
    return file_translation, output_file_path

# Create the Gradio interface
iface = gr.Interface(
    fn=translate_text,
    inputs=[gr.File(label="Upload Text File")],
    outputs=[
        gr.Textbox(label="Transcribed Content"),
        gr.File(label="Download Translated File")
    ],
    title="Text Transcriber",
    description="Upload a .txt or .rtf file to translate its content using LangChain and Mistral-7B-Instruct-v0.2 model with predefined system prompts.",
    allow_flagging="never"
)

iface.launch(debug=True)