chansung commited on
Commit
2190187
·
verified ·
1 Parent(s): 37da0c0

Upload folder using huggingface_hub

Browse files
.gitignore CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
 
1
+ .DS_Store
2
+
3
  # Byte-compiled / optimized / DLL files
4
  __pycache__/
5
  *.py[cod]
README.md CHANGED
@@ -275,3 +275,27 @@ $ python main.py # or gradio main.py
275
 
276
  # Acknowledgments
277
  This is a project built during the Vertex sprints held by Google's ML Developer Programs team. We are thankful to be granted good amount of GCP credits to do this project.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  # Acknowledgments
277
  This is a project built during the Vertex sprints held by Google's ML Developer Programs team. We are thankful to be granted good amount of GCP credits to do this project.
278
+ # AdaptSum
279
+
280
+ AdaptSum stands for Adaptive Summarization. This project focuses on developing an LLM-powered system for dynamic summarization. Instead of generating entirely new summaries with each update, the system intelligently identifies and modifies only the necessary parts of the existing summary. This approach aims to create a more efficient and fluid summarization process within a continuous chat interaction with an LLM.
281
+
282
+ # Instructions
283
+
284
+ 1. Install dependencies
285
+ ```shell
286
+ $ pip install requirements.txt
287
+ ```
288
+
289
+ 2. Setup Gemini API Key
290
+ ```shell
291
+ $ export GEMINI_API_KEY=xxxxx
292
+ ```
293
+ > note that GEMINI API KEY should be obtained from Google AI Studio. Vertex AI is not supported at the moment (this is because Gemini SDK does not provide file uploading functionality for Vertex AI usage now).
294
+
295
+ 3. Run Gradio app
296
+ ```shell
297
+ $ python main.py # or gradio main.py
298
+ ```
299
+
300
+ # Acknowledgments
301
+ This is a project built during the Vertex sprints held by Google's ML Developer Programs team. We are thankful to be granted good amount of GCP credits to do this project.
standalone/favicon-32x32.png ADDED
standalone/index.html CHANGED
@@ -8,6 +8,7 @@
8
  <!-- Include marked.js for Markdown rendering -->
9
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
10
  <link rel="stylesheet" href="style.css">
 
11
  </head>
12
  <body>
13
  <div class="app-container">
@@ -19,7 +20,7 @@
19
  <button class="new-session-btn" id="newSessionBtn">
20
  <img src="new-indicator.svg" alt="Icon" class="svg-icon">
21
  </button>
22
- <button id="toggleLayoutBtn"><img src="vertical-layout.svg" alt="Icon" class="svg-icon"></button>
23
  </div>
24
  <!-- <h3>Chat History</h3> -->
25
  <ul id="sessionList"></ul>
@@ -66,7 +67,7 @@
66
  <div id="fileAttachments" class="file-attachments"></div>
67
  <div class="input-row">
68
  <button id="attachBtn" class="attach-button">+</button>
69
- <input type="file" id="fileInput" multiple accept="image/*,.pdf">
70
  <textarea id="chatInput" placeholder="Ask Anything"></textarea>
71
  <button id="sendBtn">
72
  <img src="send.svg" alt="Icon" class="svg-icon-non-white">
@@ -122,6 +123,11 @@
122
  <option value="gemini-2.0-flash">Gemini 2.0 Flash</option>
123
  <option value="gemini-2.0-flash-lite">Gemini 2.0 Flash Lite</option>
124
  </optgroup>
 
 
 
 
 
125
  </select>
126
  </div>
127
  </div>
 
8
  <!-- Include marked.js for Markdown rendering -->
9
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
10
  <link rel="stylesheet" href="style.css">
11
+ <link rel="icon" type="image/png" href="favicon-32x32.png">
12
  </head>
13
  <body>
14
  <div class="app-container">
 
20
  <button class="new-session-btn" id="newSessionBtn">
21
  <img src="new-indicator.svg" alt="Icon" class="svg-icon">
22
  </button>
23
+ <button id="toggleLayoutBtn"><img src="vertical.svg" alt="Icon" class="svg-icon"></button>
24
  </div>
25
  <!-- <h3>Chat History</h3> -->
26
  <ul id="sessionList"></ul>
 
67
  <div id="fileAttachments" class="file-attachments"></div>
68
  <div class="input-row">
69
  <button id="attachBtn" class="attach-button">+</button>
70
+ <input type="file" id="fileInput" multiple accept="application/pdf">
71
  <textarea id="chatInput" placeholder="Ask Anything"></textarea>
72
  <button id="sendBtn">
73
  <img src="send.svg" alt="Icon" class="svg-icon-non-white">
 
123
  <option value="gemini-2.0-flash">Gemini 2.0 Flash</option>
124
  <option value="gemini-2.0-flash-lite">Gemini 2.0 Flash Lite</option>
125
  </optgroup>
126
+ <optgroup label="Hugging Face">
127
+ <option value="huggingface/meta-llama/Llama-3.3-70B-Instruct">Llama 3.3 70B Instruct</option>
128
+ <option value="huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B">DeepSeek R1 Distill Qwen 32B</option>
129
+ <option value="huggingface/Qwen/Qwen2.5-72B-Instruct">Qwen 2.5 72B Instruct</option>
130
+ </optgroup>
131
  </select>
132
  </div>
133
  </div>
standalone/script.js CHANGED
@@ -3,15 +3,30 @@
3
  const MODEL_NAME = "gpt-4o-mini";
4
 
5
  // Modified to scroll the current card to the bottom after updating the message.
6
- function updateLastMessage(content) {
7
  const session = sessions[currentSessionIndex];
8
- session.messages[session.messages.length - 1].aiResponse = content;
 
 
 
 
 
 
 
9
  renderCurrentSession();
10
- // Auto-scroll the current card to the bottom:
11
- const cards = document.querySelectorAll('.card');
12
- if (cards[currentCardIndex]) {
13
- cards[currentCardIndex].scrollTop = cards[currentCardIndex].scrollHeight;
14
- }
 
 
 
 
 
 
 
 
15
  }
16
 
17
  // ----------------- Layout and Navigation -----------------
@@ -66,14 +81,14 @@ let currentSessionIndex = 0;
66
  let currentCardIndex = 0;
67
  function initSessions() {
68
  sessions.push({
69
- id: Date.now(),
70
  name: "Chat Session 1",
71
  title: "Chat Session 1",
72
  messages: [],
73
  summary: "# Chat Summary\n\nThis is the default summary for Chat Session 1.",
74
  settings: {
75
  temperature: 0.7,
76
- maxTokens: 256,
77
  persona: "professional",
78
  model: "gpt-4o-mini" // <-- new property
79
  }
@@ -111,14 +126,14 @@ function renderSessionList() {
111
  }
112
  document.getElementById('newSessionBtn').addEventListener('click', () => {
113
  const newSession = {
114
- id: Date.now(),
115
  name: "Chat Session " + (sessions.length + 1),
116
  title: "Chat Session " + (sessions.length + 1),
117
  messages: [],
118
  summary: "# Chat Summary\n\nThis is the default summary for Chat Session " + (sessions.length + 1) + ".",
119
  settings: {
120
  temperature: 0.7,
121
- maxTokens: 256,
122
  persona: "professional",
123
  model: "gpt-4o-mini" // <-- default model
124
  }
@@ -155,7 +170,7 @@ function renderCurrentSession() {
155
  if (message.attachments && message.attachments.length > 0) {
156
  attachmentHTML = `
157
  <div class="vertical-file-list">
158
- ${message.attachments.map(name => `<div class="file-item-vertical">${name}</div>`).join("")}
159
  </div>
160
  `;
161
  }
@@ -224,6 +239,25 @@ function processMessagesInContainer(container) {
224
  container.querySelectorAll('.message').forEach(processMessage);
225
  }
226
  // ----------------- Adding Conversation & Stream API Call -----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  const attachedFiles = [];
228
  async function addConversation(userText) {
229
  if (userText.trim() === '' && attachedFiles.length === 0) return;
@@ -232,20 +266,20 @@ async function addConversation(userText) {
232
  sessions[currentSessionIndex].messages.push({
233
  userText,
234
  aiResponse: "",
235
- attachments: attachedFiles.map(file => file.name), // Store file names
236
- model: sessions[currentSessionIndex].settings.model
 
237
  });
238
 
239
  // Clear attachments after sending
240
  clearFileAttachments();
241
-
242
  renderCurrentSession();
243
 
244
  const conversation = [];
245
  sessions[currentSessionIndex].messages.forEach(msg => {
246
- conversation.push({ role: "user", content: msg.userText });
247
  if (msg.aiResponse) {
248
- conversation.push({ role: "assistant", content: msg.aiResponse });
249
  }
250
  });
251
 
@@ -314,10 +348,12 @@ attachBtn.addEventListener('click', () => {
314
  fileInput.addEventListener('change', () => {
315
  for (const file of fileInput.files) {
316
  attachedFiles.push(file);
 
317
  }
318
  fileInput.value = "";
319
  updateFileAttachments();
320
  });
 
321
  function updateFileAttachments() {
322
  fileAttachments.innerHTML = "";
323
  attachedFiles.forEach((file, index) => {
@@ -475,36 +511,42 @@ async function callLLMStream(conversation) {
475
 
476
  if (model.startsWith("gpt-4o")) {
477
  // Call OpenAI endpoint
478
- return callOpenAIStream(conversation, model, temperature, maxTokens);
479
  } else if (model.startsWith("claude")) {
480
  // Call Anthropic endpoint
481
- return callAnthropicStream(conversation, model, temperature, maxTokens);
482
  } else if (model.startsWith("gemini")) {
483
  // Call Google endpoint
484
- return callGoogleStream(conversation, model, temperature, maxTokens);
 
 
 
485
  } else {
486
  throw new Error("Unsupported model: " + model);
487
  }
488
  }
489
 
490
- async function callOpenAIStream(conversation) {
491
  const response = await fetch("http://127.0.0.1:8000/openai_stream", {
492
  method: "POST",
493
  headers: {
494
- "Content-Type": "application/json"
 
495
  // Remove the Authorization header since the Python backend handles the API key.
496
  },
497
  body: JSON.stringify({
498
  conversation: conversation,
499
  temperature: sessions[currentSessionIndex].settings.temperature,
500
  max_tokens: sessions[currentSessionIndex].settings.maxTokens,
501
- model: MODEL_NAME
502
  })
503
  });
504
  const reader = response.body.getReader();
505
  const decoder = new TextDecoder("utf-8");
506
  let done = false;
507
  let aiMessage = "";
 
 
508
  while (!done) {
509
  const { value, done: doneReading } = await reader.read();
510
  done = doneReading;
@@ -523,31 +565,33 @@ async function callOpenAIStream(conversation) {
523
  const delta = parsed.choices[0].delta.content;
524
  if (delta) {
525
  aiMessage += delta;
526
- updateLastMessage(aiMessage);
527
  }
528
  } catch (err) {
529
  console.error("Stream parsing error:", err);
530
  }
531
  }
532
  }
 
533
  return aiMessage;
534
  }
535
 
536
 
537
- async function callAnthropicStream(conversation, model, temperature, maxTokens) {
538
  model = model.toLowerCase().replace(/\s+/g, '-').replace(/\./g, '-');
539
  console.log(`Calling Anthropic API with model: ${model}`);
540
 
541
  const response = await fetch("http://127.0.0.1:8000/anthropic_stream", {
542
  method: "POST",
543
  headers: {
544
- "Content-Type": "application/json"
 
545
  },
546
  body: JSON.stringify({
547
  messages: conversation,
548
  temperature: temperature,
549
  max_tokens: maxTokens,
550
- model: model + "-latest"
551
  })
552
  });
553
 
@@ -556,6 +600,7 @@ async function callAnthropicStream(conversation, model, temperature, maxTokens)
556
  let done = false;
557
  let aiMessage = "";
558
 
 
559
  while (!done) {
560
  const { value, done: doneReading } = await reader.read();
561
  done = doneReading;
@@ -574,32 +619,33 @@ async function callAnthropicStream(conversation, model, temperature, maxTokens)
574
  const delta = parsed.choices[0].delta.content;
575
  if (delta) {
576
  aiMessage += delta;
577
- updateLastMessage(aiMessage);
578
  }
579
  } catch (err) {
580
  console.error("Anthropic stream parsing error:", err);
581
  }
582
  }
583
  }
584
-
585
  return aiMessage;
586
 
587
  }
588
 
589
- async function callGoogleStream(conversation, model, temperature, maxTokens) {
590
  // Convert conversation messages to Gemini's "contents" format.
591
  model = model.toLowerCase().replace(/\s+/g, '-');
592
  console.log(model);
593
  const response = await fetch("http://127.0.0.1:8000/gemini_stream", {
594
  method: "POST",
595
  headers: {
596
- "Content-Type": "application/json"
 
597
  },
598
  body: JSON.stringify({
599
  messages: conversation,
600
  temperature: temperature,
601
  max_tokens: maxTokens,
602
- model: model
603
  })
604
  });
605
 
@@ -607,7 +653,8 @@ async function callGoogleStream(conversation, model, temperature, maxTokens) {
607
  const decoder = new TextDecoder("utf-8");
608
  let done = false;
609
  let aiMessage = "";
610
-
 
611
  while (!done) {
612
  const { value, done: doneReading } = await reader.read();
613
  done = doneReading;
@@ -626,18 +673,67 @@ async function callGoogleStream(conversation, model, temperature, maxTokens) {
626
  const delta = parsed.choices[0].delta.content;
627
  if (delta) {
628
  aiMessage += delta;
629
- updateLastMessage(aiMessage);
630
  }
631
  } catch (err) {
632
  console.error("Gemini stream parsing error:", err);
633
  }
634
  }
635
  }
636
-
637
  return aiMessage;
638
-
639
  }
640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
 
642
  // ----------------- Initialization -----------------
643
  initSessions();
 
3
  const MODEL_NAME = "gpt-4o-mini";
4
 
5
  // Modified to scroll the current card to the bottom after updating the message.
6
+ function updateLastMessage(content, isStreaming = false) {
7
  const session = sessions[currentSessionIndex];
8
+ const cursorHTML = `<span class="blinking-cursor"></span>`;
9
+ session.messages[session.messages.length - 1].aiResponse = isStreaming ? content + cursorHTML : content;
10
+
11
+ // Get the current scroll position of the last card before re-rendering
12
+ const lastCardBefore = document.querySelector('.card:last-child');
13
+ const prevScrollTop = lastCardBefore ? lastCardBefore.scrollTop : 0;
14
+
15
+ // Re-render the conversation
16
  renderCurrentSession();
17
+
18
+ // Use requestAnimationFrame to wait until the new DOM is laid out
19
+ requestAnimationFrame(() => {
20
+ const lastCardAfter = document.querySelector('.card:last-child');
21
+ if (lastCardAfter) {
22
+ if (isStreaming) {
23
+ lastCardAfter.scrollTop = lastCardAfter.scrollHeight;
24
+ } else {
25
+ // Restore the previous scroll position
26
+ lastCardAfter.scrollTop = prevScrollTop;
27
+ }
28
+ }
29
+ });
30
  }
31
 
32
  // ----------------- Layout and Navigation -----------------
 
81
  let currentCardIndex = 0;
82
  function initSessions() {
83
  sessions.push({
84
+ id: Date.now() + '-' + Math.random().toString(36).substr(2, 9),
85
  name: "Chat Session 1",
86
  title: "Chat Session 1",
87
  messages: [],
88
  summary: "# Chat Summary\n\nThis is the default summary for Chat Session 1.",
89
  settings: {
90
  temperature: 0.7,
91
+ maxTokens: 1024,
92
  persona: "professional",
93
  model: "gpt-4o-mini" // <-- new property
94
  }
 
126
  }
127
  document.getElementById('newSessionBtn').addEventListener('click', () => {
128
  const newSession = {
129
+ id: Date.now() + '-' + Math.random().toString(36).substr(2, 9),
130
  name: "Chat Session " + (sessions.length + 1),
131
  title: "Chat Session " + (sessions.length + 1),
132
  messages: [],
133
  summary: "# Chat Summary\n\nThis is the default summary for Chat Session " + (sessions.length + 1) + ".",
134
  settings: {
135
  temperature: 0.7,
136
+ maxTokens: 1024,
137
  persona: "professional",
138
  model: "gpt-4o-mini" // <-- default model
139
  }
 
170
  if (message.attachments && message.attachments.length > 0) {
171
  attachmentHTML = `
172
  <div class="vertical-file-list">
173
+ ${message.attachments.map(file => `<div class="file-item-vertical">${file.path}</div>`).join("")}
174
  </div>
175
  `;
176
  }
 
239
  container.querySelectorAll('.message').forEach(processMessage);
240
  }
241
  // ----------------- Adding Conversation & Stream API Call -----------------
242
+ async function fileToBase64(file) {
243
+ return new Promise((resolve, reject) => {
244
+ const reader = new FileReader();
245
+ reader.onload = () => {
246
+ // Get the base64 string (remove the data URL prefix)
247
+ const base64 = reader.result.split(',')[1];
248
+ resolve({
249
+ name: file.name,
250
+ path: file.webkitRelativePath || file.path || file.name,
251
+ size: file.size,
252
+ type: file.type,
253
+ content: base64
254
+ });
255
+ };
256
+ reader.onerror = reject;
257
+ reader.readAsDataURL(file);
258
+ });
259
+ }
260
+
261
  const attachedFiles = [];
262
  async function addConversation(userText) {
263
  if (userText.trim() === '' && attachedFiles.length === 0) return;
 
266
  sessions[currentSessionIndex].messages.push({
267
  userText,
268
  aiResponse: "",
269
+ attachments: await Promise.all(attachedFiles.map(fileToBase64)),
270
+ model: sessions[currentSessionIndex].settings.model,
271
+ sessionId: sessions[currentSessionIndex].id
272
  });
273
 
274
  // Clear attachments after sending
275
  clearFileAttachments();
 
276
  renderCurrentSession();
277
 
278
  const conversation = [];
279
  sessions[currentSessionIndex].messages.forEach(msg => {
280
+ conversation.push({ role: "user", content: msg.userText, attachments: msg.attachments, sessionId: msg.sessionId });
281
  if (msg.aiResponse) {
282
+ conversation.push({ role: "assistant", content: msg.aiResponse, sessionId: msg.sessionId });
283
  }
284
  });
285
 
 
348
  fileInput.addEventListener('change', () => {
349
  for (const file of fileInput.files) {
350
  attachedFiles.push(file);
351
+ // Display the file path in the console
352
  }
353
  fileInput.value = "";
354
  updateFileAttachments();
355
  });
356
+
357
  function updateFileAttachments() {
358
  fileAttachments.innerHTML = "";
359
  attachedFiles.forEach((file, index) => {
 
511
 
512
  if (model.startsWith("gpt-4o")) {
513
  // Call OpenAI endpoint
514
+ return callOpenAIStream(session.id, conversation, model, temperature, maxTokens);
515
  } else if (model.startsWith("claude")) {
516
  // Call Anthropic endpoint
517
+ return callAnthropicStream(session.id, conversation, model, temperature, maxTokens);
518
  } else if (model.startsWith("gemini")) {
519
  // Call Google endpoint
520
+ return callGoogleStream(session.id, conversation, model, temperature, maxTokens);
521
+ } else if (model.startsWith("huggingface")) {
522
+ // Call Hugging Face endpoint
523
+ return callHuggingFaceStream(session.id, conversation, model.replace("huggingface/", ""), temperature, maxTokens);
524
  } else {
525
  throw new Error("Unsupported model: " + model);
526
  }
527
  }
528
 
529
+ async function callOpenAIStream(sessionId, conversation) {
530
  const response = await fetch("http://127.0.0.1:8000/openai_stream", {
531
  method: "POST",
532
  headers: {
533
+ "Content-Type": "application/json",
534
+ "X-Session-ID": sessionId
535
  // Remove the Authorization header since the Python backend handles the API key.
536
  },
537
  body: JSON.stringify({
538
  conversation: conversation,
539
  temperature: sessions[currentSessionIndex].settings.temperature,
540
  max_tokens: sessions[currentSessionIndex].settings.maxTokens,
541
+ model: MODEL_NAME,
542
  })
543
  });
544
  const reader = response.body.getReader();
545
  const decoder = new TextDecoder("utf-8");
546
  let done = false;
547
  let aiMessage = "";
548
+
549
+ updateLastMessage(aiMessage, true);
550
  while (!done) {
551
  const { value, done: doneReading } = await reader.read();
552
  done = doneReading;
 
565
  const delta = parsed.choices[0].delta.content;
566
  if (delta) {
567
  aiMessage += delta;
568
+ updateLastMessage(aiMessage, true);
569
  }
570
  } catch (err) {
571
  console.error("Stream parsing error:", err);
572
  }
573
  }
574
  }
575
+ updateLastMessage(aiMessage, false);
576
  return aiMessage;
577
  }
578
 
579
 
580
+ async function callAnthropicStream(sessionId, conversation, model, temperature, maxTokens) {
581
  model = model.toLowerCase().replace(/\s+/g, '-').replace(/\./g, '-');
582
  console.log(`Calling Anthropic API with model: ${model}`);
583
 
584
  const response = await fetch("http://127.0.0.1:8000/anthropic_stream", {
585
  method: "POST",
586
  headers: {
587
+ "Content-Type": "application/json",
588
+ "X-Session-ID": sessionId
589
  },
590
  body: JSON.stringify({
591
  messages: conversation,
592
  temperature: temperature,
593
  max_tokens: maxTokens,
594
+ model: model + "-latest",
595
  })
596
  });
597
 
 
600
  let done = false;
601
  let aiMessage = "";
602
 
603
+ updateLastMessage(aiMessage, true);
604
  while (!done) {
605
  const { value, done: doneReading } = await reader.read();
606
  done = doneReading;
 
619
  const delta = parsed.choices[0].delta.content;
620
  if (delta) {
621
  aiMessage += delta;
622
+ updateLastMessage(aiMessage, true);
623
  }
624
  } catch (err) {
625
  console.error("Anthropic stream parsing error:", err);
626
  }
627
  }
628
  }
629
+ updateLastMessage(aiMessage, false);
630
  return aiMessage;
631
 
632
  }
633
 
634
+ async function callGoogleStream(sessionId, conversation, model, temperature, maxTokens) {
635
  // Convert conversation messages to Gemini's "contents" format.
636
  model = model.toLowerCase().replace(/\s+/g, '-');
637
  console.log(model);
638
  const response = await fetch("http://127.0.0.1:8000/gemini_stream", {
639
  method: "POST",
640
  headers: {
641
+ "Content-Type": "application/json",
642
+ "X-Session-ID": sessionId
643
  },
644
  body: JSON.stringify({
645
  messages: conversation,
646
  temperature: temperature,
647
  max_tokens: maxTokens,
648
+ model: model,
649
  })
650
  });
651
 
 
653
  const decoder = new TextDecoder("utf-8");
654
  let done = false;
655
  let aiMessage = "";
656
+
657
+ updateLastMessage(aiMessage, true);
658
  while (!done) {
659
  const { value, done: doneReading } = await reader.read();
660
  done = doneReading;
 
673
  const delta = parsed.choices[0].delta.content;
674
  if (delta) {
675
  aiMessage += delta;
676
+ updateLastMessage(aiMessage, true);
677
  }
678
  } catch (err) {
679
  console.error("Gemini stream parsing error:", err);
680
  }
681
  }
682
  }
683
+ updateLastMessage(aiMessage, false);
684
  return aiMessage;
 
685
  }
686
 
687
+ async function callHuggingFaceStream(sessionId, conversation, model, temperature, maxTokens) {
688
+ console.log(`Calling Hugging Face API with model: ${model}`);
689
+ const response = await fetch("http://127.0.0.1:8000/huggingface_stream", {
690
+ method: "POST",
691
+ headers: {
692
+ "Content-Type": "application/json",
693
+ "X-Session-ID": sessionId
694
+ },
695
+ body: JSON.stringify({
696
+ messages: conversation,
697
+ temperature: temperature,
698
+ max_tokens: maxTokens,
699
+ model: model,
700
+ })
701
+ });
702
+
703
+ const reader = response.body.getReader();
704
+ const decoder = new TextDecoder("utf-8");
705
+ let done = false;
706
+ let aiMessage = "";
707
+
708
+ updateLastMessage(aiMessage, true);
709
+ while (!done) {
710
+ const { value, done: doneReading } = await reader.read();
711
+ done = doneReading;
712
+ const chunk = decoder.decode(value);
713
+ const lines = chunk.split("\n").filter(line => line.trim().startsWith("data:"));
714
+
715
+ for (const line of lines) {
716
+ const dataStr = line.replace(/^data:\s*/, "");
717
+ if (dataStr === "[DONE]") {
718
+ done = true;
719
+ break;
720
+ }
721
+
722
+ try {
723
+ const parsed = JSON.parse(dataStr);
724
+ const delta = parsed.choices[0].delta.content;
725
+ if (delta) {
726
+ aiMessage += delta;
727
+ updateLastMessage(aiMessage, true);
728
+ }
729
+ } catch (err) {
730
+ console.error("Hugging Face stream parsing error:", err);
731
+ }
732
+ }
733
+ }
734
+ updateLastMessage(aiMessage, false);
735
+ return aiMessage;
736
+ }
737
 
738
  // ----------------- Initialization -----------------
739
  initSessions();
standalone/server/main.py CHANGED
@@ -1,10 +1,15 @@
1
  import json
 
2
  from fastapi import FastAPI, Request, HTTPException
3
  from fastapi.responses import StreamingResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
- import httpx
6
 
7
  app = FastAPI()
 
 
 
 
8
 
9
  # Allow all origins for testing (adjust for production)
10
  app.add_middleware(
@@ -17,222 +22,7 @@ app.add_middleware(
17
 
18
  # Replace these with secure methods in production
19
  import os
20
-
21
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
22
- ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
23
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
24
- MODEL_NAME = "gpt-4o-mini"
25
-
26
- @app.post("/openai_stream")
27
- async def openai_stream(request: Request):
28
- try:
29
- body = await request.json()
30
- except Exception as e:
31
- raise HTTPException(status_code=400, detail="Invalid JSON payload") from e
32
-
33
- conversation = body.get("conversation")
34
- if not conversation:
35
- raise HTTPException(status_code=400, detail="Missing 'conversation' in payload")
36
-
37
- temperature = body.get("temperature", 0.7)
38
- max_tokens = body.get("max_tokens", 256)
39
- model = body.get("model", MODEL_NAME)
40
-
41
- # Using OpenAI's SDK instead of direct API calls
42
- from openai import AsyncOpenAI
43
-
44
- # Initialize the client with the API key
45
- client = AsyncOpenAI(api_key=OPENAI_API_KEY)
46
-
47
- async def event_generator():
48
- try:
49
- print(f"Starting stream for model: {model}, temperature: {temperature}, max_tokens: {max_tokens}")
50
- line_count = 0
51
-
52
- # Use the SDK to create a streaming completion
53
- stream = await client.chat.completions.create(
54
- model=model,
55
- messages=conversation,
56
- temperature=temperature,
57
- max_tokens=max_tokens,
58
- stream=True
59
- )
60
-
61
- async for chunk in stream:
62
- if chunk.choices and chunk.choices[0].delta.content is not None:
63
- content = chunk.choices[0].delta.content
64
- line_count += 1
65
- if line_count % 10 == 0:
66
- print(f"Processed {line_count} stream chunks")
67
-
68
- # Format the response in the same way as before
69
- response_json = json.dumps({
70
- "choices": [{"delta": {"content": content}}]
71
- })
72
- yield f"data: {response_json}\n\n"
73
-
74
- # Send the [DONE] marker
75
- print("Stream completed successfully")
76
- yield "data: [DONE]\n\n"
77
-
78
- except Exception as e:
79
- print(f"Error during streaming: {str(e)}")
80
- yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
81
- finally:
82
- print(f"Stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
83
-
84
- print("Returning StreamingResponse to client")
85
- return StreamingResponse(event_generator(), media_type="text/event-stream")
86
-
87
- @app.post("/gemini_stream")
88
- async def gemini_stream(request: Request):
89
- """
90
- Stream responses from Google's Gemini model using the Gemini SDK.
91
- """
92
- body = await request.json()
93
- conversation = body.get("messages", [])
94
- temperature = body.get("temperature", 0.7)
95
- max_tokens = body.get("max_tokens", 256)
96
- model = body.get("model", "gemini-pro") # Default to gemini-pro model
97
-
98
- # Using Google's Generative AI SDK
99
- import google.generativeai as genai
100
- from google.generativeai.types import HarmCategory, HarmBlockThreshold
101
-
102
- # Initialize the client with the API key
103
- genai.configure(api_key=GOOGLE_API_KEY)
104
-
105
- # Convert OpenAI message format to Gemini format
106
- gemini_messages = []
107
- for msg in conversation:
108
- role = "user" if msg["role"] == "user" else "model"
109
- gemini_messages.append({"role": role, "parts": [msg["content"]]})
110
-
111
- async def event_generator():
112
- try:
113
- print(f"Starting Gemini stream for model: {model}, temperature: {temperature}, max_tokens: {max_tokens}")
114
- line_count = 0
115
-
116
- # Create a Gemini model instance
117
- gemini_model = genai.GenerativeModel(
118
- model_name=model,
119
- generation_config={
120
- "temperature": temperature,
121
- "max_output_tokens": max_tokens,
122
- "top_p": 0.95,
123
- },
124
- safety_settings={
125
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
126
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
127
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
128
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
129
- }
130
- )
131
-
132
- # Start the streaming response
133
- response = gemini_model.generate_content(
134
- gemini_messages,
135
- stream=True
136
- )
137
-
138
- # Fix: Use synchronous iteration instead of async for
139
- for chunk in response:
140
- if hasattr(chunk, 'text') and chunk.text:
141
- content = chunk.text
142
- line_count += 1
143
- if line_count % 10 == 0:
144
- print(f"Processed {line_count} Gemini stream chunks")
145
-
146
- # Format the response to match OpenAI format for client compatibility
147
- response_json = json.dumps({
148
- "choices": [{"delta": {"content": content}}]
149
- })
150
- yield f"data: {response_json}\n\n"
151
-
152
- # Send the [DONE] marker
153
- print("Gemini stream completed successfully")
154
- yield "data: [DONE]\n\n"
155
-
156
- except Exception as e:
157
- print(f"Error during Gemini streaming: {str(e)}")
158
- yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
159
- finally:
160
- print(f"Gemini stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
161
-
162
- print("Returning StreamingResponse from Gemini to client")
163
- return StreamingResponse(event_generator(), media_type="text/event-stream")
164
-
165
- @app.post("/anthropic_stream")
166
- async def anthropic_stream(request: Request):
167
- """
168
- Stream responses from Anthropic's Claude models.
169
- """
170
- print("Received request for Anthropic streaming")
171
-
172
- # Parse the request body
173
- body = await request.json()
174
- messages = body.get("messages", [])
175
- temperature = body.get("temperature", 0.7)
176
- max_tokens = body.get("max_tokens", 1024)
177
- model = body.get("model", "claude-3-opus-20240229")
178
-
179
- # Load Anthropic API key from environment
180
- anthropic_api_key = ANTHROPIC_API_KEY #os.environ.get("ANTHROPIC_API_KEY")
181
- if not anthropic_api_key:
182
- return JSONResponse(
183
- status_code=500,
184
- content={"error": "ANTHROPIC_API_KEY not found in environment variables"}
185
- )
186
-
187
- # Convert messages to Anthropic format
188
- anthropic_messages = []
189
- for msg in messages:
190
- role = "assistant" if msg.get("role") == "assistant" else "user"
191
- content = msg.get("content", "")
192
- anthropic_messages.append({"role": role, "content": content})
193
-
194
- line_count = 0
195
-
196
- async def event_generator():
197
- try:
198
- import anthropic
199
-
200
- # Initialize Anthropic client
201
- client = anthropic.Anthropic(api_key=anthropic_api_key)
202
-
203
- # Start the streaming response
204
- with client.messages.stream(
205
- model=model,
206
- messages=anthropic_messages,
207
- max_tokens=max_tokens,
208
- temperature=temperature
209
- ) as stream:
210
- for chunk in stream:
211
- if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'text') and chunk.delta.text:
212
- content = chunk.delta.text
213
- nonlocal line_count
214
- line_count += 1
215
- if line_count % 10 == 0:
216
- print(f"Processed {line_count} Anthropic stream chunks")
217
-
218
- # Format the response to match OpenAI format for client compatibility
219
- response_json = json.dumps({
220
- "choices": [{"delta": {"content": content}}]
221
- })
222
- yield f"data: {response_json}\n\n"
223
-
224
- # Send the [DONE] marker
225
- print("Anthropic stream completed successfully")
226
- yield "data: [DONE]\n\n"
227
-
228
- except Exception as e:
229
- print(f"Error during Anthropic streaming: {str(e)}")
230
- yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
231
- finally:
232
- print(f"Anthropic stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
233
-
234
- print("Returning StreamingResponse from Anthropic to client")
235
- return StreamingResponse(event_generator(), media_type="text/event-stream")
236
 
237
  @app.post("/summarize_openai")
238
  async def summarize_openai(request: Request):
@@ -250,7 +40,7 @@ async def summarize_openai(request: Request):
250
 
251
  # Load the prompt from prompts.toml
252
  import tomli
253
- with open("configs/prompts.toml", "rb") as f:
254
  prompts_config = tomli.load(f)
255
 
256
  # Get the prompt and system prompt
@@ -306,7 +96,7 @@ async def summarize_anthropic(request: Request):
306
 
307
  # Load the prompt from prompts.toml
308
  import tomli
309
- with open("configs/prompts.toml", "rb") as f:
310
  prompts_config = tomli.load(f)
311
 
312
  # Get the prompt and system prompt
@@ -361,7 +151,7 @@ async def summarize_google(request: Request):
361
 
362
  # Load the prompt from prompts.toml
363
  import tomli
364
- with open("configs/prompts.toml", "rb") as f:
365
  prompts_config = tomli.load(f)
366
 
367
  # Get the prompt and system prompt
 
1
  import json
2
+ import httpx
3
  from fastapi import FastAPI, Request, HTTPException
4
  from fastapi.responses import StreamingResponse
5
  from fastapi.middleware.cors import CORSMiddleware
6
+ from stream import openai, anthropic, google, huggingface
7
 
8
  app = FastAPI()
9
+ app.include_router(openai.router)
10
+ app.include_router(anthropic.router)
11
+ app.include_router(google.router)
12
+ app.include_router(huggingface.router)
13
 
14
  # Allow all origins for testing (adjust for production)
15
  app.add_middleware(
 
22
 
23
  # Replace these with secure methods in production
24
  import os
25
+ from collections import defaultdict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  @app.post("/summarize_openai")
28
  async def summarize_openai(request: Request):
 
40
 
41
  # Load the prompt from prompts.toml
42
  import tomli
43
+ with open("../../configs/prompts.toml", "rb") as f:
44
  prompts_config = tomli.load(f)
45
 
46
  # Get the prompt and system prompt
 
96
 
97
  # Load the prompt from prompts.toml
98
  import tomli
99
+ with open("../../configs/prompts.toml", "rb") as f:
100
  prompts_config = tomli.load(f)
101
 
102
  # Get the prompt and system prompt
 
151
 
152
  # Load the prompt from prompts.toml
153
  import tomli
154
+ with open("../../configs/prompts.toml", "rb") as f:
155
  prompts_config = tomli.load(f)
156
 
157
  # Get the prompt and system prompt
standalone/server/stream/anthropic.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from fastapi import APIRouter
6
+
7
+ from anthropic import Anthropic
8
+
9
+ from .utils import handle_attachments
10
+
11
+ router = APIRouter()
12
+ ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
13
+
14
+ attachments_in_anthropic = {}
15
+
16
+ @router.post("/anthropic_stream")
17
+ async def anthropic_stream(request: Request):
18
+ """
19
+ Stream responses from Anthropic's Claude models.
20
+ """
21
+ print("Received request for Anthropic streaming")
22
+
23
+ # Parse the request body
24
+ body = await request.json()
25
+ conversation = body.get("messages", [])
26
+ temperature = body.get("temperature", 0.7)
27
+ max_tokens = body.get("max_tokens", 1024)
28
+ model = body.get("model", "claude-3-opus-20240229")
29
+
30
+ # Get session ID from the request
31
+ session_id = request.headers.get("X-Session-ID")
32
+ if session_id not in attachments_in_anthropic: attachments_in_anthropic[session_id] = {}
33
+ if not session_id:
34
+ raise HTTPException(status_code=400, detail="Missing 'session_id' in payload")
35
+
36
+ # Handle file attachments if present
37
+ conversation = await handle_attachments(session_id, conversation, remove_content=False)
38
+ anthropic_messages = []
39
+ for msg in conversation:
40
+ role = "user" if msg["role"] == "user" else "assistant"
41
+
42
+ pdf_base64s = []
43
+ if "attachments" in msg:
44
+ for attachment in msg["attachments"]:
45
+ if attachment["file_path"].endswith(".pdf"):
46
+ print(attachment)
47
+ if attachment["file_path"] not in attachments_in_anthropic[session_id]:
48
+ pdf_base64 = {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": attachment["content"]}}
49
+ pdf_base64s.append(pdf_base64)
50
+ attachments_in_anthropic[session_id][attachment["name"]] = pdf_base64
51
+ else:
52
+ pdf_base64s.append(attachments_in_anthropic[session_id][attachment["name"]])
53
+
54
+ anthropic_messages.append({"role": role, "content": pdf_base64s + [{"type": "text", "text": msg["content"]}]})
55
+
56
+ line_count = 0
57
+
58
+ async def event_generator():
59
+ try:
60
+ # Initialize Anthropic client
61
+ client = Anthropic(api_key=ANTHROPIC_API_KEY)
62
+
63
+ # Start the streaming response
64
+ with client.messages.stream(
65
+ model=model,
66
+ messages=anthropic_messages,
67
+ max_tokens=max_tokens,
68
+ temperature=temperature
69
+ ) as stream:
70
+ for chunk in stream:
71
+ if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'text') and chunk.delta.text:
72
+ content = chunk.delta.text
73
+ nonlocal line_count
74
+ line_count += 1
75
+ if line_count % 10 == 0:
76
+ print(f"Processed {line_count} Anthropic stream chunks")
77
+
78
+ # Format the response to match OpenAI format for client compatibility
79
+ response_json = json.dumps({
80
+ "choices": [{"delta": {"content": content}}]
81
+ })
82
+ yield f"data: {response_json}\n\n"
83
+
84
+ # Send the [DONE] marker
85
+ print("Anthropic stream completed successfully")
86
+ yield "data: [DONE]\n\n"
87
+
88
+ except Exception as e:
89
+ print(f"Error during Anthropic streaming: {str(e)}")
90
+ yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
91
+ finally:
92
+ print(f"Anthropic stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
93
+
94
+ print("Returning StreamingResponse from Anthropic to client")
95
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
standalone/server/stream/google.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from fastapi import APIRouter
6
+
7
+ from google.genai import types
8
+ from google import genai
9
+
10
+ from .utils import handle_attachments
11
+
12
+ router = APIRouter()
13
+
14
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
15
+ client = genai.client.AsyncClient(genai.client.ApiClient(api_key=GOOGLE_API_KEY))
16
+
17
+ attachments_in_gcp = {}
18
+
19
+ @router.post("/gemini_stream")
20
+ async def gemini_stream(request: Request):
21
+ """
22
+ Stream responses from Google's Gemini model using the Gemini SDK.
23
+ """
24
+ body = await request.json()
25
+ conversation = body.get("messages", [])
26
+ temperature = body.get("temperature", 0.7)
27
+ max_tokens = body.get("max_tokens", 256)
28
+ model = body.get("model", "gemini-pro") # Default to gemini-pro model
29
+
30
+ # Get session ID from the request
31
+ session_id = request.headers.get("X-Session-ID")
32
+ if session_id not in attachments_in_gcp: attachments_in_gcp[session_id] = {}
33
+ if not session_id:
34
+ raise HTTPException(status_code=400, detail="Missing 'session_id' in payload")
35
+
36
+ # Handle file attachments if present
37
+ conversation = await handle_attachments(session_id, conversation)
38
+
39
+ # Convert OpenAI message format to Gemini format
40
+ gemini_messages = []
41
+ for msg in conversation:
42
+ role = "user" if msg["role"] == "user" else "model"
43
+ attachments = []
44
+
45
+ if "attachments" in msg:
46
+ for attachment in msg["attachments"]:
47
+ if attachment["file_path"] not in attachments_in_gcp[session_id]:
48
+ gcp_upload = await client.files.upload(path=attachment["file_path"])
49
+ path_wrap = types.Part.from_uri(file_uri=gcp_upload.uri, mime_type=gcp_upload.mime_type)
50
+ attachments_in_gcp[session_id][attachment["file_path"]] = path_wrap
51
+ attachments.append(path_wrap)
52
+ else:
53
+ attachments.append(attachments_in_gcp[session_id][attachment["file_path"]])
54
+ print("Uploaded File Reused")
55
+
56
+ gemini_messages.append(
57
+ types.Content(role=role, parts=[types.Part.from_text(text=msg["content"])] + attachments)
58
+ )
59
+
60
+ print(gemini_messages)
61
+
62
+ async def event_generator():
63
+ try:
64
+ print(f"Starting Gemini stream for model: {model}, temperature: {temperature}, max_tokens: {max_tokens}")
65
+ line_count = 0
66
+
67
+ # Create a Gemini model instance
68
+ response = await client.models.generate_content_stream(
69
+ model=model,
70
+ contents=gemini_messages,
71
+ config=types.GenerateContentConfig(
72
+ temperature=temperature,
73
+ max_output_tokens=max_tokens,
74
+ top_p=0.95,
75
+ )
76
+ )
77
+
78
+ # Fix: Use synchronous iteration instead of async for
79
+ async for chunk in response:
80
+ content = chunk.text
81
+ line_count += 1
82
+ if line_count % 10 == 0:
83
+ print(f"Processed {line_count} Gemini stream chunks")
84
+
85
+ # Format the response to match OpenAI format for client compatibility
86
+ response_json = json.dumps({
87
+ "choices": [{"delta": {"content": content}}]
88
+ })
89
+ yield f"data: {response_json}\n\n"
90
+
91
+ # Send the [DONE] marker
92
+ print("Gemini stream completed successfully")
93
+ yield "data: [DONE]\n\n"
94
+
95
+ except Exception as e:
96
+ print(f"Error during Gemini streaming: {str(e)}")
97
+ yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
98
+ finally:
99
+ print(f"Gemini stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
100
+
101
+ print("Returning StreamingResponse from Gemini to client")
102
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
standalone/server/stream/huggingface.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from fastapi import APIRouter
6
+
7
+ from huggingface_hub import AsyncInferenceClient
8
+
9
+ from .utils import handle_attachments, extract_text_from_pdf
10
+
11
+ router = APIRouter()
12
+
13
+ HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
14
+ client = AsyncInferenceClient(api_key=HUGGINGFACE_TOKEN)
15
+
16
+ attachments_in_huggingface = {}
17
+
18
+ @router.post("/huggingface_stream")
19
+ async def huggingface_stream(request: Request):
20
+ try:
21
+ body = await request.json()
22
+ except Exception as e:
23
+ raise HTTPException(status_code=400, detail="Invalid JSON payload") from e
24
+
25
+ conversation = body.get("messages")
26
+ if not conversation:
27
+ raise HTTPException(status_code=400, detail="Missing 'conversation' in payload")
28
+
29
+ print("--------------------------------")
30
+ print(body)
31
+ print()
32
+ temperature = body.get("temperature", 0.7)
33
+ max_tokens = body.get("max_tokens", 256)
34
+ model = body.get("model", "meta-llama/Llama-3.3-70B-Instruct")
35
+
36
+ # Get session ID from the request
37
+ session_id = request.headers.get("X-Session-ID")
38
+ if session_id not in attachments_in_huggingface: attachments_in_huggingface[session_id] = {}
39
+ if not session_id:
40
+ raise HTTPException(status_code=400, detail="Missing 'session_id' in payload")
41
+
42
+ # Handle file attachments if present)
43
+ conversation = await handle_attachments(session_id, conversation)
44
+ huggingface_messages = []
45
+ for msg in conversation:
46
+ role = "user" if msg["role"] == "user" else "assistant"
47
+
48
+ pdf_texts = []
49
+ if "attachments" in msg:
50
+ for attachment in msg["attachments"]:
51
+ if attachment["file_path"].endswith(".pdf"):
52
+ if attachment["file_path"] not in attachments_in_huggingface[session_id]:
53
+ pdf_text = await extract_text_from_pdf(attachment["file_path"])
54
+ pdf_texts.append([attachment["name"], pdf_text])
55
+ attachments_in_huggingface[session_id][attachment["name"]] = pdf_text
56
+ else:
57
+ pdf_texts.append([attachment["name"], attachments_in_huggingface[session_id][attachment["name"]]])
58
+
59
+ huggingface_messages.append({"role": role, "content": msg["content"]})
60
+ for pdf_text in pdf_texts:
61
+ huggingface_messages.append({"role": "user", "content": f"{pdf_text[0]}\n\n{pdf_text[1]}"})
62
+
63
+ async def event_generator():
64
+ try:
65
+ print(f"Starting stream for model: {model}, temperature: {temperature}, max_tokens: {max_tokens}")
66
+ line_count = 0
67
+
68
+ # Use the SDK to create a streaming completion
69
+ stream = await client.chat.completions.create(
70
+ model=model,
71
+ messages=huggingface_messages,
72
+ temperature=temperature,
73
+ max_tokens=max_tokens,
74
+ stream=True
75
+ )
76
+
77
+ async for chunk in stream:
78
+ if chunk.choices and chunk.choices[0].delta.content is not None:
79
+ content = chunk.choices[0].delta.content
80
+ line_count += 1
81
+ if line_count % 10 == 0:
82
+ print(f"Processed {line_count} stream chunks")
83
+
84
+ # Format the response in the same way as before
85
+ response_json = json.dumps({
86
+ "choices": [{"delta": {"content": content}}]
87
+ })
88
+ yield f"data: {response_json}\n\n"
89
+
90
+ # Send the [DONE] marker
91
+ print("Stream completed successfully")
92
+ yield "data: [DONE]\n\n"
93
+
94
+ except Exception as e:
95
+ print(f"Error during streaming: {str(e)}")
96
+ yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
97
+ finally:
98
+ print(f"Stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
99
+
100
+ print("Returning StreamingResponse to client")
101
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
standalone/server/stream/openai.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from fastapi import APIRouter
6
+
7
+ from openai import AsyncOpenAI
8
+
9
+ from .utils import handle_attachments, extract_text_from_pdf
10
+
11
+ router = APIRouter()
12
+
13
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
14
+ client = AsyncOpenAI(api_key=OPENAI_API_KEY)
15
+
16
+ attachments_in_openai = {}
17
+
18
+ @router.post("/openai_stream")
19
+ async def openai_stream(request: Request):
20
+ try:
21
+ body = await request.json()
22
+ except Exception as e:
23
+ raise HTTPException(status_code=400, detail="Invalid JSON payload") from e
24
+
25
+ conversation = body.get("conversation")
26
+ if not conversation:
27
+ raise HTTPException(status_code=400, detail="Missing 'conversation' in payload")
28
+
29
+ print("--------------------------------")
30
+ print(body)
31
+ print()
32
+ temperature = body.get("temperature", 0.7)
33
+ max_tokens = body.get("max_tokens", 256)
34
+ model = body.get("model", "gpt-4o-mini")
35
+
36
+ # Get session ID from the request
37
+ session_id = request.headers.get("X-Session-ID")
38
+ if session_id not in attachments_in_openai: attachments_in_openai[session_id] = {}
39
+ if not session_id:
40
+ raise HTTPException(status_code=400, detail="Missing 'session_id' in payload")
41
+
42
+ # Handle file attachments if present
43
+ conversation = await handle_attachments(session_id, conversation)
44
+ gpt_messages = []
45
+ for msg in conversation:
46
+ role = "user" if msg["role"] == "user" else "assistant"
47
+
48
+ pdf_texts = []
49
+ if "attachments" in msg:
50
+ for attachment in msg["attachments"]:
51
+ if attachment["file_path"].endswith(".pdf"):
52
+ if attachment["file_path"] not in attachments_in_openai[session_id]:
53
+ pdf_text = await extract_text_from_pdf(attachment["file_path"])
54
+ pdf_texts.append([attachment["name"], pdf_text])
55
+ attachments_in_openai[session_id][attachment["name"]] = pdf_text
56
+ else:
57
+ pdf_texts.append([attachment["name"], attachments_in_openai[session_id][attachment["name"]]])
58
+
59
+ gpt_messages.append({"role": role, "content": msg["content"]})
60
+ for pdf_text in pdf_texts:
61
+ gpt_messages.append({"role": "user", "content": f"{pdf_text[0]}\n\n{pdf_text[1]}"})
62
+
63
+ async def event_generator():
64
+ try:
65
+ print(f"Starting stream for model: {model}, temperature: {temperature}, max_tokens: {max_tokens}")
66
+ line_count = 0
67
+
68
+ # Use the SDK to create a streaming completion
69
+ stream = await client.chat.completions.create(
70
+ model=model,
71
+ messages=gpt_messages,
72
+ temperature=temperature,
73
+ max_tokens=max_tokens,
74
+ stream=True
75
+ )
76
+
77
+ async for chunk in stream:
78
+ if chunk.choices and chunk.choices[0].delta.content is not None:
79
+ content = chunk.choices[0].delta.content
80
+ line_count += 1
81
+ if line_count % 10 == 0:
82
+ print(f"Processed {line_count} stream chunks")
83
+
84
+ # Format the response in the same way as before
85
+ response_json = json.dumps({
86
+ "choices": [{"delta": {"content": content}}]
87
+ })
88
+ yield f"data: {response_json}\n\n"
89
+
90
+ # Send the [DONE] marker
91
+ print("Stream completed successfully")
92
+ yield "data: [DONE]\n\n"
93
+
94
+ except Exception as e:
95
+ print(f"Error during streaming: {str(e)}")
96
+ yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
97
+ finally:
98
+ print(f"Stream ended after processing {line_count if 'line_count' in locals() else 0} chunks")
99
+
100
+ print("Returning StreamingResponse to client")
101
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
standalone/server/stream/utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from collections import defaultdict
4
+
5
+ import PyPDF2
6
+
7
+ async def extract_text_from_pdf(pdf_path):
8
+ text = ""
9
+ with open(pdf_path, "rb") as pdf_file:
10
+ reader = PyPDF2.PdfReader(pdf_file)
11
+ for page in reader.pages:
12
+ text += page.extract_text() + "\n"
13
+ return text.strip()
14
+
15
+ async def handle_attachments(session_id, conversation, remove_content=True):
16
+ """
17
+ Process attachments for each message in the conversation.
18
+
19
+ Args:
20
+ session_id (str): The unique identifier for the session
21
+ conversation (list): List of message objects containing attachments
22
+
23
+ Returns:
24
+ None
25
+ """
26
+ # Process attachments for each message in the conversation
27
+ for outer_idx, msg in enumerate(conversation):
28
+ if "attachments" in msg and msg["attachments"]:
29
+ # Create a temporary folder for this session if it doesn't exist
30
+ session_folder = os.path.join("temp_attachments", session_id)
31
+ os.makedirs(session_folder, exist_ok=True)
32
+
33
+ for inner_idx, attachment in enumerate(msg["attachments"]):
34
+ attachment_name = attachment.get("name", "unknown_file")
35
+ attachment_content = attachment.get("content")
36
+
37
+ # Check if this attachment already exists in the session
38
+ attachment_exists = False
39
+ file_path = None
40
+
41
+ for existing_attachment in msg["attachments"]:
42
+ if existing_attachment.get("name") == attachment_name and existing_attachment.get("file_path"):
43
+ attachment_exists = True
44
+ file_path = existing_attachment.get("file_path")
45
+ break
46
+
47
+ # Only decode and save if it's a new attachment
48
+ if not attachment_exists and attachment_content:
49
+ try:
50
+ file_path = os.path.join(session_folder, attachment_name)
51
+ # Decode base64 content and write to file
52
+ with open(file_path, "wb") as f:
53
+ f.write(base64.b64decode(attachment_content))
54
+
55
+ except Exception as e:
56
+ print(f"Error saving attachment: {str(e)}")
57
+
58
+ # Add file_path to the original attachment dict
59
+ if file_path:
60
+ if remove_content:
61
+ del attachment["content"]
62
+ attachment["file_path"] = file_path
63
+ msg["attachments"][inner_idx] = attachment
64
+ conversation[outer_idx] = msg
65
+
66
+ return conversation
standalone/style.css CHANGED
@@ -321,10 +321,11 @@
321
  gap: 5px;
322
  }
323
  .file-item-vertical {
324
- background: #f0f0f0;
325
  padding: 6px 10px;
326
  border-radius: var(--border-radius);
327
- font-size: 0.9em;
 
328
  }
329
  /* Navigation Buttons */
330
  .nav {
@@ -441,12 +442,14 @@
441
  border: none;
442
  outline: none;
443
  resize: none;
444
- overflow: hidden;
445
  font-size: 1em;
446
  height: 36px;
447
- line-height: 36px;
448
  margin: 0;
449
  padding: 0 8px;
 
 
450
  }
451
  #chatInput::placeholder {
452
  color: #999;
@@ -471,130 +474,192 @@
471
  #sendBtn:hover {
472
  background: #f0f0f0;
473
  }
474
- /* Summary Overlay */
 
475
  #summaryOverlay {
476
- position: fixed;
477
- left: 0;
478
- right: 0;
479
- bottom: 0;
480
- height: 60%;
481
- background: #fff;
482
- box-shadow: 0 -4px 16px var(--light-shadow);
483
- transform: translateY(100%);
484
- transition: transform 0.3s ease;
485
- z-index: 20;
486
- display: flex;
487
- flex-direction: column;
488
- }
489
- #summaryOverlay.active {
490
- transform: translateY(0);
491
- }
492
- .summary-header {
493
- padding-left: 20px;
494
- padding-right: 20px;
495
- padding-top: 10px;
496
- padding-bottom: 10px;
497
- background: var(--primary-color);
498
- color: #fff;
499
- font-size: 1.4em;
500
- display: flex;
501
- justify-content: space-between;
502
- align-items: center;
503
- }
504
- .summary-header-buttons {
505
- display: flex;
506
- gap: 12px;
507
- }
508
- .download-summary {
509
- background: #fff;
510
- color: var(--primary-color);
511
- border: 1px solid var(--primary-color);
512
- border-radius: 6px;
513
- padding: 6px 12px;
514
- cursor: pointer;
515
- transition: background 0.3s, color 0.3s;
516
- }
517
- .download-summary:hover {
518
- background: var(--primary-color);
519
- color: #fff;
520
- }
521
- .close-summary {
522
- background: none;
523
- border: none;
524
- color: #fff;
525
- font-size: 1.4em;
526
- cursor: pointer;
527
- }
528
- .summary-content {
529
- padding: 20px;
530
- overflow-y: auto;
531
- flex: 1;
532
- }
533
- /* Settings Overlay */
534
- #settingsOverlay {
535
- position: fixed;
536
- left: 0;
537
- right: 0;
538
- bottom: 0;
539
- height: 40%;
540
- background: #fff;
541
- box-shadow: 0 -4px 16px var(--light-shadow);
542
- transform: translateY(100%);
543
- transition: transform 0.3s ease;
544
- z-index: 20;
545
- display: flex;
546
- flex-direction: column;
547
- }
548
- #settingsOverlay.active {
549
- transform: translateY(0);
550
- }
551
- .settings-header {
552
- padding-left: 20px;
553
- padding-right: 20px;
554
- padding-top: 10px;
555
- padding-bottom: 10px;
556
- background: var(--primary-color);
557
- color: #fff;
558
- font-size: 1.4em;
559
- display: flex;
560
- justify-content: space-between;
561
- align-items: center;
562
- }
563
- .close-settings {
564
- background: none;
565
- border: none;
566
- color: #fff;
567
- font-size: 1.6em;
568
- cursor: pointer;
569
- }
570
- .settings-content {
571
- padding: 20px;
572
- overflow-y: auto;
573
- flex: 1;
574
- font-size: 1.1em;
575
- line-height: 1.5;
576
- }
577
- .settings-group {
578
- margin-bottom: 20px;
579
- display: flex;
580
- align-items: center;
581
- gap: 12px;
582
- }
583
- .settings-group label {
584
- min-width: 120px;
585
- }
586
- .save-settings {
587
- background: var(--primary-color);
588
- color: #fff;
589
- border: none;
590
- border-radius: 8px;
591
- padding: 12px 24px;
592
- cursor: pointer;
593
- transition: background 0.3s;
594
- }
595
- .save-settings:hover {
596
- background: var(--primary-dark);
597
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  @media (max-width: 600px) {
599
  .nav-bar {
600
  display: none;
@@ -677,33 +742,47 @@
677
 
678
  /* Markdown */
679
  .markdown-body {
680
- font-size: 1em;
681
- line-height: 1.5;
682
- white-space: normal; /* Ensure paragraphs and lists break onto new lines */
683
- }
684
-
685
- .markdown-body p {
686
- margin: 0.75em 0; /* Add vertical space between paragraphs */
687
- }
688
-
689
- .markdown-body ul,
690
- .markdown-body ol {
691
- margin: 0.75em 0;
692
- padding-left: 1.5em; /* Indent bullets/numbers */
693
- }
694
-
695
- .markdown-body li {
696
- margin: 0.3em 0;
697
- }
698
-
699
- .markdown-body h1,
700
- .markdown-body h2,
701
- .markdown-body h3,
702
- .markdown-body h4,
703
- .markdown-body h5,
704
- .markdown-body h6 {
705
- margin-top: 1em;
706
- margin-bottom: 0.5em;
707
- font-weight: bold;
708
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
 
 
321
  gap: 5px;
322
  }
323
  .file-item-vertical {
324
+ background: #0500e624;
325
  padding: 6px 10px;
326
  border-radius: var(--border-radius);
327
+ font-size: 0.5em;
328
+ font-style: italic;
329
  }
330
  /* Navigation Buttons */
331
  .nav {
 
442
  border: none;
443
  outline: none;
444
  resize: none;
445
+ overflow: scroll;
446
  font-size: 1em;
447
  height: 36px;
448
+ /* line-height: 36px; */
449
  margin: 0;
450
  padding: 0 8px;
451
+ padding: 10px;
452
+ max-height: 100px;
453
  }
454
  #chatInput::placeholder {
455
  color: #999;
 
474
  #sendBtn:hover {
475
  background: #f0f0f0;
476
  }
477
+
478
+ /* Redesigned Summary Overlay with Max Height */
479
  #summaryOverlay {
480
+ position: fixed;
481
+ bottom: 0;
482
+ left: 50%;
483
+ transform: translateX(-50%) translateY(100%);
484
+ width: 90%;
485
+ /* max-width: 600px; Wider than settings panel */
486
+ max-height: 50vh; /* Limit to half the viewport height */
487
+ background: linear-gradient(135deg, #ffffff, #f7f7f7);
488
+ border-top-left-radius: 20px;
489
+ border-top-right-radius: 20px;
490
+ box-shadow: 0 -4px 20px rgba(0, 0, 0, 0.1);
491
+ transition: transform 0.3s ease;
492
+ z-index: 20;
493
+ display: flex;
494
+ flex-direction: column;
495
+ overflow: hidden;
496
+ padding-bottom: 50px;
497
+ }
498
+
499
+ #summaryOverlay.active {
500
+ transform: translateX(-50%) translateY(0);
501
+ }
502
+
503
+ .summary-header {
504
+ display: flex;
505
+ align-items: center;
506
+ justify-content: space-between;
507
+ padding: 16px 20px;
508
+ background: linear-gradient(135deg, #4a90e2, #4178c0);
509
+ color: #fff;
510
+ font-size: 1.5em;
511
+ border-top-left-radius: 20px;
512
+ border-top-right-radius: 20px;
513
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
514
+ }
515
+
516
+ .summary-header span {
517
+ font-weight: 500;
518
+ }
519
+
520
+ .summary-header-buttons {
521
+ display: flex;
522
+ gap: 12px;
523
+ }
524
+
525
+ .download-summary {
526
+ background: #fff;
527
+ color: #4a90e2;
528
+ border: 1px solid #4a90e2;
529
+ border-radius: 8px;
530
+ padding: 6px 12px;
531
+ cursor: pointer;
532
+ transition: background 0.3s, color 0.3s;
533
+ }
534
+
535
+ .download-summary:hover {
536
+ background: #4a90e2;
537
+ color: #fff;
538
+ }
539
+
540
+ .close-summary {
541
+ background: none;
542
+ border: none;
543
+ color: #fff;
544
+ font-size: 1.8em;
545
+ cursor: pointer;
546
+ line-height: 1;
547
+ }
548
+
549
+ .summary-content {
550
+ padding: 20px;
551
+ overflow-y: auto; /* Scrollbar appears when content exceeds available height */
552
+ flex: 1;
553
+ font-size: 1em;
554
+ color: #333;
555
+ background: #fff;
556
+ }
557
+
558
+ /* Redesigned Settings Overlay */
559
+ #settingsOverlay {
560
+ position: fixed;
561
+ bottom: 0;
562
+ left: 50%;
563
+ transform: translateX(-50%) translateY(100%);
564
+ width: 90%;
565
+ max-width: 500px;
566
+ background: linear-gradient(135deg, #ffffff, #f7f7f7);
567
+ border-top-left-radius: 20px;
568
+ border-top-right-radius: 20px;
569
+ box-shadow: 0 -4px 20px rgba(0, 0, 0, 0.1);
570
+ transition: transform 0.3s ease;
571
+ z-index: 20;
572
+ display: flex;
573
+ flex-direction: column;
574
+ overflow: hidden;
575
+ padding-bottom: 50px;
576
+ }
577
+
578
+ #settingsOverlay.active {
579
+ transform: translateX(-50%) translateY(0);
580
+ }
581
+
582
+ .settings-header {
583
+ display: flex;
584
+ align-items: center;
585
+ justify-content: space-between;
586
+ padding: 16px 20px;
587
+ background: linear-gradient(135deg, #4a90e2, #4178c0);
588
+ color: #fff;
589
+ font-size: 1.5em;
590
+ border-top-left-radius: 20px;
591
+ border-top-right-radius: 20px;
592
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
593
+ }
594
+
595
+ .settings-header span {
596
+ font-weight: 500;
597
+ }
598
+
599
+ .close-settings {
600
+ background: none;
601
+ border: none;
602
+ color: #fff;
603
+ font-size: 1.8em;
604
+ cursor: pointer;
605
+ line-height: 1;
606
+ }
607
+
608
+ .settings-content {
609
+ padding: 20px;
610
+ flex: 1;
611
+ overflow-y: auto;
612
+ font-size: 1em;
613
+ color: #333;
614
+ background: #fff;
615
+ }
616
+
617
+ .settings-group {
618
+ display: flex;
619
+ flex-direction: column;
620
+ margin-bottom: 20px;
621
+ }
622
+
623
+ .settings-group label {
624
+ margin-bottom: 8px;
625
+ font-weight: 500;
626
+ }
627
+
628
+ .settings-group input[type="range"],
629
+ .settings-group input[type="number"],
630
+ .settings-group select {
631
+ padding: 10px;
632
+ border: 1px solid #ccc;
633
+ border-radius: 8px;
634
+ outline: none;
635
+ width: 100%;
636
+ font-size: 1em;
637
+ background: #fefefe;
638
+ transition: border 0.3s;
639
+ }
640
+
641
+ .settings-group input[type="range"]:focus,
642
+ .settings-group input[type="number"]:focus,
643
+ .settings-group select:focus {
644
+ border-color: #4a90e2;
645
+ }
646
+
647
+ .save-settings {
648
+ background: linear-gradient(135deg, #4a90e2, #4178c0);
649
+ color: #fff;
650
+ border: none;
651
+ border-radius: 8px;
652
+ padding: 12px;
653
+ width: 100%;
654
+ font-size: 1em;
655
+ cursor: pointer;
656
+ transition: background 0.3s;
657
+ }
658
+
659
+ .save-settings:hover {
660
+ background: linear-gradient(135deg, #4178c0, #4a90e2);
661
+ }
662
+
663
  @media (max-width: 600px) {
664
  .nav-bar {
665
  display: none;
 
742
 
743
  /* Markdown */
744
  .markdown-body {
745
+ font-size: 1em;
746
+ line-height: 1.5;
747
+ white-space: normal; /* Ensure paragraphs and lists break onto new lines */
748
+ }
749
+
750
+ .markdown-body p {
751
+ margin: 0.75em 0; /* Add vertical space between paragraphs */
752
+ }
753
+
754
+ .markdown-body ul,
755
+ .markdown-body ol {
756
+ margin: 0.75em 0;
757
+ padding-left: 1.5em; /* Indent bullets/numbers */
758
+ }
759
+
760
+ .markdown-body li {
761
+ margin: 0.3em 0;
762
+ }
763
+
764
+ .markdown-body h1,
765
+ .markdown-body h2,
766
+ .markdown-body h3,
767
+ .markdown-body h4,
768
+ .markdown-body h5,
769
+ .markdown-body h6 {
770
+ margin-top: 1em;
771
+ margin-bottom: 0.5em;
772
+ font-weight: bold;
773
+ }
774
+
775
+ .blinking-cursor {
776
+ display: inline-block;
777
+ width: 10px;
778
+ height: 1em;
779
+ background-color: currentColor;
780
+ margin-left: 2px;
781
+ animation: blink 1s steps(2, start) infinite;
782
+ }
783
+
784
+ @keyframes blink {
785
+ 50% { opacity: 0; }
786
+ 100% { opacity: 1; }
787
+ }
788