File size: 2,941 Bytes
39ea455
 
 
 
 
a6f222d
39ea455
 
 
 
 
8b49c77
39ea455
8b49c77
39ea455
 
 
8b49c77
 
 
 
 
 
 
39ea455
 
 
 
a6f222d
 
 
 
8b01349
a6f222d
 
 
 
 
 
 
8b01349
 
 
 
 
 
 
 
 
 
 
 
 
a6f222d
 
 
8b01349
 
 
 
 
 
 
 
39ea455
 
8b01349
 
39ea455
8b01349
39ea455
 
8b01349
39ea455
8b01349
 
39ea455
8b01349
 
39ea455
 
 
 
8b01349
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint

load_dotenv()


system_prompt_1 = """
    You are an advanced AI assistant tasked with helping to  transcribes given texts into
    simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language). 
    This system is intended to streamline the creation of accessible content for government websites.

     Instructions for AI Development:

    detect the language of text given then transcribes text into the same language which the guidelines of 
    FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language) and
     accurately transcribe complex texts into simplified language.
  
    Ensure maintaining the context and meaning of the original text while simplifying its language.
    
    text: {text}

    transcribes text: """

def translate_text(file, text_input):


    repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

   # Initialize the HuggingFace endpoint
    llm = HuggingFaceEndpoint(
        repo_id=repo_id,
        max_length=128,
        temperature=0.5,
        huggingfacehub_api_token=api_token,
    )
    
    # Read the input file based on its type
    if file.name.endswith('.txt'):
        with open(file.name, 'r', encoding='utf-8') as f:
            file_text = f.read()
    elif file.name.endswith('.rtf'):
        import pyth.plugins.rtf15.reader as rtf15_reader
        import pyth.plugins.plaintext.writer as plaintext_writer
        doc = rtf15_reader.read(f)
        file_text = plaintext_writer.write(doc).getvalue()
    else:
        raise ValueError("Unsupported file type. Please upload a .txt or .rtf file.")
    
    # Define the prompt template and create the LLM chain
    prompt = PromptTemplate.from_template(system_prompt_1)
    llm_chain = prompt | llm
    file_translation = llm_chain.invoke({"text": file_text})
    
    # Determine the output file path and type
    if file.name.endswith('.txt'):
        output_file_path = "translated_file.txt"
    elif file.name.endswith('.rtf'):
        output_file_path = "translated_file.rtf"
    
    # Write the translated text to the output file
    with open(output_file_path, 'w', encoding='utf-8') as f:
        f.write(file_translation)
    
    return file_translation, output_file_path

# Create the Gradio interface
iface = gr.Interface(
    fn=translate_text,
    inputs=[gr.File(label="Upload Text File")],
    outputs=[
        gr.Textbox(label="Transcribed Content"),
        gr.File(label="Download Translated File")
    ],
    title="Text Transcriber",
    description="Upload a .txt or .rtf file to translate its content using LangChain and Mistral-7B-Instruct-v0.2 model with predefined system prompts.",
    allow_flagging="never"
)

iface.launch(debug=True)