mbarnig commited on
Commit
2537e91
Β·
verified Β·
1 Parent(s): 03f6424

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import AsyncAssistantEventHandler
2
+ from openai import AsyncOpenAI
3
+ import gradio as gr
4
+ import asyncio
5
+ import os
6
+
7
+ # set the keys
8
+ client = AsyncOpenAI(
9
+ api_key=os.getenv("OPENAI_API_KEY")
10
+ )
11
+
12
+ assistantID = os.getenv("OPENAI_ASSISTANT_ID")
13
+ username = os.getenv("YOUR_ID")
14
+ password = os.getenv("YOUR_PASSWORD")
15
+
16
+ mytitle = "<h1 align=center>RTL AI News Reader : What happened in the country πŸ‡±πŸ‡Ί and in the world 🌎 ?</h1>"
17
+
18
+ mydescription="""
19
+ <h3 align='center'>Which topic interests you : 🐢 πŸƒπŸ»β€β™‚οΈ πŸŒ— πŸ‡ 🌈 🍽️ πŸ† 🚘 ✈️ 🩺 </h3>
20
+ <table width=100%>
21
+ <tr>
22
+ <th width=50% bgcolor="Moccasin">Ask your questions in Luxembourgish or another language :</th>
23
+ <th bgcolor="Khaki">Response from the OpenAI File-Search Assistant :</th>
24
+ </tr>
25
+ </table>
26
+ """
27
+
28
+ myarticle ="""
29
+ <h3>Background :</h3>
30
+ <p>This HuggingFace Space demo was created by <a href="https://github.com/mbarnig">Marco Barnig</a>.As an artificial intelligence,
31
+ the <a href="https://platform.openai.com/docs/models">OpenAI model</a> gpt-4o-mini-2024-07-18 is used via API,
32
+ which can utilize up to 128,000 tokens as context, provide an answer to a question with a maximum of 16,384 tokens,
33
+ and process up to 200,000 tokens per minute (TPM). All english content from RTL.lu from the beginning up to September 2024 has been split into 16 JSON files
34
+ and uploaded to a Vector Store by the OpenAI File-Search Assistant "RTL English News Reader."
35
+ Each file contains fewer than 5 million tokens, which is an upper limit for the AI model. It is possible to upload up to 10,000 files to an OpenAI Assistant.
36
+ The responses of the examples are cached and therefore displayed without delay.</p>
37
+ """
38
+
39
+ myinput = gr.Textbox(lines=3, label=" What would you like to know ?")
40
+
41
+ myexamples =
42
+ "What happened in 2014 ?"
43
+ ]
44
+
45
+ class EventHandler(AsyncAssistantEventHandler):
46
+ def __init__(self) -> None:
47
+ super().__init__()
48
+ self.response_text = ""
49
+
50
+ async def on_text_created(self, text) -> None:
51
+ self.response_text += str(text)
52
+
53
+ async def on_text_delta(self, delta, snapshot):
54
+ self.response_text += str(delta.value)
55
+
56
+ async def on_text_done(self, text):
57
+ pass
58
+
59
+ async def on_tool_call_created(self, tool_call):
60
+ self.response_text += f"\n[Tool Call]: {str(tool_call.type)}\n"
61
+
62
+ async def on_tool_call_delta(self, delta, snapshot):
63
+ if snapshot.id != getattr(self, "current_tool_call", None):
64
+ self.current_tool_call = snapshot.id
65
+ self.response_text += f"\n[Tool Call Delta]: {str(delta.type)}\n"
66
+
67
+ if delta.type == 'code_interpreter':
68
+ if delta.code_interpreter.input:
69
+ self.response_text += str(delta.code_interpreter.input)
70
+ if delta.code_interpreter.outputs:
71
+ self.response_text += "\n\n[Output]:\n"
72
+ for output in delta.code_interpreter.outputs:
73
+ if output.type == "logs":
74
+ self.response_text += f"\n{str(output.logs)}"
75
+
76
+ async def on_tool_call_done(self, text):
77
+ pass
78
+
79
+ # Initialize session variables
80
+ session_data = {"assistant_id": assistantID, "thread_id": None}
81
+
82
+ async def initialize_thread():
83
+ # Create a Thread
84
+ thread = await client.beta.threads.create()
85
+ # Store thread ID in session_data for later use
86
+ session_data["thread_id"] = thread.id
87
+
88
+ async def generate_response(user_input):
89
+ assistant_id = session_data["assistant_id"]
90
+ thread_id = session_data["thread_id"]
91
+
92
+ # Add a Message to the Thread
93
+ oai_message = await client.beta.threads.messages.create(
94
+ thread_id=thread_id,
95
+ role="user",
96
+ content=user_input
97
+ )
98
+
99
+ # Create and Stream a Run
100
+ event_handler = EventHandler()
101
+
102
+ async with client.beta.threads.runs.stream(
103
+ thread_id=thread_id,
104
+ assistant_id=assistant_id,
105
+ instructions="Please assist the user with their query.",
106
+ event_handler=event_handler,
107
+ ) as stream:
108
+ # Yield incremental updates
109
+ async for _ in stream:
110
+ await asyncio.sleep(0.1) # Small delay to mimic streaming
111
+ yield event_handler.response_text
112
+
113
+ # Gradio interface function (generator)
114
+ async def gradio_chat_interface(user_input):
115
+ # Create a new event loop if none exists (or if we are in a new thread)
116
+ try:
117
+ loop = asyncio.get_running_loop()
118
+ except RuntimeError:
119
+ loop = asyncio.new_event_loop()
120
+ asyncio.set_event_loop(loop)
121
+
122
+ # Initialize the thread if not already done
123
+ if session_data["thread_id"] is None:
124
+ await initialize_thread()
125
+
126
+ # Generate and yield responses
127
+ async for response in generate_response(user_input):
128
+ yield response
129
+
130
+ # Set up Gradio interface with streaming
131
+ interface = gr.Interface(
132
+ fn=gradio_chat_interface,
133
+ inputs=myinput,
134
+ outputs="markdown",
135
+ title=mytitle,
136
+ description=mydescription,
137
+ article=myarticle,
138
+ live=False,
139
+ allow_flagging="never",
140
+ examples=myexamples
141
+ )
142
+
143
+ # Launch the Gradio app
144
+ interface.launch(auth=(username, password), auth_message="<h1>Lecteur de nouvelles IA de RTL</h1><p>Ce HuggingFace