Lenylvt commited on
Commit
7cb0b8e
·
verified ·
1 Parent(s): 7c82987

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -28
app.py CHANGED
@@ -1,39 +1,58 @@
1
- import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import re
 
 
 
 
4
 
5
- # Initialize the InferenceClient with the Mixtral model
6
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
 
 
 
7
 
8
- def translate_srt(file_info, target_language):
9
- # file_info is a dictionary containing information about the file
10
- # including its content
11
- srt_content = file_info["content"].decode("utf-8")
12
- lines = srt_content.split('\n')
13
 
14
- translated_lines = []
15
- for line in lines:
16
- if re.match(r"^\d+$", line) or re.match(r"^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$", line):
17
- translated_lines.append(line) # Copy timestamps and numbers directly
18
- elif line.strip() == "":
19
- translated_lines.append(line) # Preserve empty lines for formatting
20
- else:
21
- # Translate the text line
22
- response = client(inputs={"inputs": line, "parameters": {"target_language": target_language}})
23
- translated_lines.append(response[0]["generated_text"])
24
 
25
- # Join the translated lines back into a single string
26
- translated_srt_content = "\n".join(translated_lines)
27
- return translated_srt_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Gradio interface
30
  iface = gr.Interface(
31
- fn=translate_srt,
32
- inputs=[gr.File(label="Upload SRT File"), gr.Dropdown(["fr", "en", "es", "de", "it", "pt"], label="Target Language")],
33
  outputs="text",
34
- title="SRT File Translator",
35
- description="Translate SRT files to the selected language using Mixtral model."
36
  )
37
 
38
- # Launch the Gradio app
39
  iface.launch()
 
 
1
  from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+
4
+ client = InferenceClient(
5
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
6
+ )
7
 
8
+ def format_prompt(message, history):
9
+ prompt = "<s>"
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"[INST] {user_prompt} [/INST]"
12
+ prompt += f" {bot_response}</s> "
13
+ prompt += f"[INST] {message} [/INST]"
14
+ return prompt
15
 
16
+ def generate_from_file(file_path, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
17
+ # Read the file content
18
+ with open(file_path, 'r', encoding='utf-8') as file:
19
+ file_content = file.read()
 
20
 
21
+ # You might need to modify this part to fit how you want to use the file content in your prompt
22
+ prompt = file_content[:1000] # Example: using first 1000 characters of the file content
 
 
 
 
 
 
 
 
23
 
24
+ return generate(prompt, history, system_prompt, temperature, max_new_tokens, top_p, repetition_penalty)
25
+
26
+ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
27
+ temperature = float(temperature)
28
+ if temperature < 1e-2:
29
+ temperature = 1e-2
30
+ top_p = float(top_p)
31
+
32
+ generate_kwargs = dict(
33
+ temperature=temperature,
34
+ max_new_tokens=max_new_tokens,
35
+ top_p=top_p,
36
+ repetition_penalty=repetition_penalty,
37
+ do_sample=True,
38
+ seed=42,
39
+ )
40
+
41
+ formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
42
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
43
+ output = ""
44
+
45
+ for response in stream:
46
+ output += response.token.text
47
+ yield output
48
+ return output
49
 
 
50
  iface = gr.Interface(
51
+ fn=generate_from_file,
52
+ inputs=[gr.File(label="Upload File"), gr.State(), gr.Textbox(label="System Prompt")],
53
  outputs="text",
54
+ title="SRT File Translation",
55
+ concurrency_limit=20,
56
  )
57
 
 
58
  iface.launch()