openfree commited on
Commit
fca6281
ยท
verified ยท
1 Parent(s): 4dd6e62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -706,13 +706,12 @@ def stream_chat(
706
 
707
  print(f"์ž…๋ ฅ ํ…์„œ ์ƒ์„ฑ ํ›„ CUDA ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
708
 
709
-
710
- try:
711
- # ์ŠคํŠธ๋ฆฌ๋จธ ์ดˆ๊ธฐํ™” ์‹œ ํƒ€์ž„์•„์›ƒ์„ ๋” ๊ธธ๊ฒŒ ์„ค์ •
712
  streamer = TextIteratorStreamer(
713
  tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
714
  )
715
-
 
716
  generate_kwargs = dict(
717
  **inputs,
718
  streamer=streamer,
@@ -727,8 +726,6 @@ def stream_chat(
727
  use_cache=True
728
  )
729
 
730
-
731
-
732
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
733
  clear_cuda_memory()
734
 
@@ -736,26 +733,39 @@ def stream_chat(
736
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
737
  thread.start()
738
 
739
- # ์ŠคํŠธ๋ฆฌ๋ฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์™ธ ์ฒ˜๋ฆฌ ๊ฐ•ํ™”
740
  buffer = ""
 
 
 
741
  try:
742
  for new_text in streamer:
743
  try:
744
  buffer += new_text
745
- yield "", history + [[message, buffer]]
 
 
 
 
 
 
 
746
  except Exception as inner_e:
747
  print(f"๊ฐœ๋ณ„ ํ† ํฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {str(inner_e)}")
748
  continue
749
- except Exception as stream_e:
750
- print(f"์ŠคํŠธ๋ฆฌ๋ฐ ์ „์ฒด ์˜ค๋ฅ˜: {str(stream_e)}")
751
- if not buffer:
752
- buffer = "์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
 
 
 
753
  yield "", history + [[message, buffer]]
754
- except Exception as outer_e:
755
- print(f"์ „์ฒด ์ƒ์„ฑ ๊ณผ์ • ์˜ค๋ฅ˜: {str(outer_e)}")
756
- yield "", history + [[message, "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."]]
757
-
758
- ]
759
 
760
  # ์Šค๋ ˆ๋“œ๊ฐ€ ์—ฌ์ „ํžˆ ์‹คํ–‰ ์ค‘์ด๋ฉด ์ข…๋ฃŒ ๋Œ€๊ธฐ
761
  if thread.is_alive():
 
706
 
707
  print(f"์ž…๋ ฅ ํ…์„œ ์ƒ์„ฑ ํ›„ CUDA ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
708
 
709
+ # ์ŠคํŠธ๋ฆฌ๋จธ ์„ค์ •
 
 
710
  streamer = TextIteratorStreamer(
711
  tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
712
  )
713
+
714
+ # ์ƒ์„ฑ ๋งค๊ฐœ๋ณ€์ˆ˜ ์„ค์ •
715
  generate_kwargs = dict(
716
  **inputs,
717
  streamer=streamer,
 
726
  use_cache=True
727
  )
728
 
 
 
729
  # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
730
  clear_cuda_memory()
731
 
 
733
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
734
  thread.start()
735
 
736
+ # ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
737
  buffer = ""
738
+ partial_message = ""
739
+ last_yield_time = time.time()
740
+
741
  try:
742
  for new_text in streamer:
743
  try:
744
  buffer += new_text
745
+ partial_message += new_text
746
+
747
+ # ์ผ์ • ์‹œ๊ฐ„๋งˆ๋‹ค ๋˜๋Š” ํ…์ŠคํŠธ๊ฐ€ ์Œ“์ผ ๋•Œ๋งˆ๋‹ค ๊ฒฐ๊ณผ ์—…๋ฐ์ดํŠธ
748
+ current_time = time.time()
749
+ if current_time - last_yield_time > 0.1 or len(partial_message) > 20:
750
+ yield "", history + [[message, buffer]]
751
+ partial_message = ""
752
+ last_yield_time = current_time
753
  except Exception as inner_e:
754
  print(f"๊ฐœ๋ณ„ ํ† ํฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {str(inner_e)}")
755
  continue
756
+
757
+ # ๋งˆ์ง€๋ง‰ ์‘๋‹ต ํ™•์ธ
758
+ if buffer:
759
+ yield "", history + [[message, buffer]]
760
+
761
+ # ๋Œ€ํ™” ๊ธฐ๋ก์— ์ €์žฅ
762
+ chat_history.add_conversation(message, buffer)
763
+
764
+ except Exception as e:
765
+ print(f"์ŠคํŠธ๋ฆฌ๋ฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
766
+ if not buffer: # ๋ฒ„ํผ๊ฐ€ ๋น„์–ด์žˆ์œผ๋ฉด ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ํ‘œ์‹œ
767
+ buffer = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
768
  yield "", history + [[message, buffer]]
 
 
 
 
 
769
 
770
  # ์Šค๋ ˆ๋“œ๊ฐ€ ์—ฌ์ „ํžˆ ์‹คํ–‰ ์ค‘์ด๋ฉด ์ข…๋ฃŒ ๋Œ€๊ธฐ
771
  if thread.is_alive():