Spaces:
Sleeping
Sleeping
File size: 2,956 Bytes
39ea455 a6f222d 39ea455 8b49c77 39ea455 8b49c77 39ea455 8b49c77 39ea455 a6f222d 8b01349 a6f222d 88120b9 a6f222d 8b01349 a6f222d 8b01349 39ea455 8b01349 39ea455 8b01349 39ea455 8b01349 39ea455 8b01349 39ea455 8b01349 39ea455 8b01349 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint
load_dotenv()
system_prompt_1 = """
You are an advanced AI assistant tasked with helping to transcribes given texts into
simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language).
This system is intended to streamline the creation of accessible content for government websites.
Instructions for AI Development:
detect the language of text given then transcribes text into the same language which the guidelines of
FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language) and
accurately transcribe complex texts into simplified language.
Ensure maintaining the context and meaning of the original text while simplifying its language.
text: {text}
transcribes text: """
def translate_text(file, text_input):
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
# Initialize the HuggingFace endpoint
llm = HuggingFaceEndpoint(
repo_id=repo_id,
max_length=128,
temperature=0.5,
huggingfacehub_api_token=os.environ["api_token"] ,
)
# Read the input file based on its type
if file.name.endswith('.txt'):
with open(file.name, 'r', encoding='utf-8') as f:
file_text = f.read()
elif file.name.endswith('.rtf'):
import pyth.plugins.rtf15.reader as rtf15_reader
import pyth.plugins.plaintext.writer as plaintext_writer
doc = rtf15_reader.read(f)
file_text = plaintext_writer.write(doc).getvalue()
else:
raise ValueError("Unsupported file type. Please upload a .txt or .rtf file.")
# Define the prompt template and create the LLM chain
prompt = PromptTemplate.from_template(system_prompt_1)
llm_chain = prompt | llm
file_translation = llm_chain.invoke({"text": file_text})
# Determine the output file path and type
if file.name.endswith('.txt'):
output_file_path = "translated_file.txt"
elif file.name.endswith('.rtf'):
output_file_path = "translated_file.rtf"
# Write the translated text to the output file
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write(file_translation)
return file_translation, output_file_path
# Create the Gradio interface
iface = gr.Interface(
fn=translate_text,
inputs=[gr.File(label="Upload Text File")],
outputs=[
gr.Textbox(label="Transcribed Content"),
gr.File(label="Download Translated File")
],
title="Text Transcriber",
description="Upload a .txt or .rtf file to translate its content using LangChain and Mistral-7B-Instruct-v0.2 model with predefined system prompts.",
allow_flagging="never"
)
iface.launch(debug=True)
|