Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -706,13 +706,12 @@ def stream_chat(
|
|
706 |
|
707 |
print(f"์
๋ ฅ ํ
์ ์์ฑ ํ CUDA ๋ฉ๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
|
708 |
|
709 |
-
|
710 |
-
try:
|
711 |
-
# ์คํธ๋ฆฌ๋จธ ์ด๊ธฐํ ์ ํ์์์์ ๋ ๊ธธ๊ฒ ์ค์
|
712 |
streamer = TextIteratorStreamer(
|
713 |
tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
|
714 |
)
|
715 |
-
|
|
|
716 |
generate_kwargs = dict(
|
717 |
**inputs,
|
718 |
streamer=streamer,
|
@@ -727,8 +726,6 @@ def stream_chat(
|
|
727 |
use_cache=True
|
728 |
)
|
729 |
|
730 |
-
|
731 |
-
|
732 |
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
733 |
clear_cuda_memory()
|
734 |
|
@@ -736,26 +733,39 @@ def stream_chat(
|
|
736 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
737 |
thread.start()
|
738 |
|
739 |
-
# ์คํธ๋ฆฌ๋ฐ
|
740 |
buffer = ""
|
|
|
|
|
|
|
741 |
try:
|
742 |
for new_text in streamer:
|
743 |
try:
|
744 |
buffer += new_text
|
745 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
746 |
except Exception as inner_e:
|
747 |
print(f"๊ฐ๋ณ ํ ํฐ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {str(inner_e)}")
|
748 |
continue
|
749 |
-
|
750 |
-
|
751 |
-
if
|
752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
753 |
yield "", history + [[message, buffer]]
|
754 |
-
except Exception as outer_e:
|
755 |
-
print(f"์ ์ฒด ์์ฑ ๊ณผ์ ์ค๋ฅ: {str(outer_e)}")
|
756 |
-
yield "", history + [[message, "์ฃ์กํฉ๋๋ค, ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค."]]
|
757 |
-
|
758 |
-
]
|
759 |
|
760 |
# ์ค๋ ๋๊ฐ ์ฌ์ ํ ์คํ ์ค์ด๋ฉด ์ข
๋ฃ ๋๊ธฐ
|
761 |
if thread.is_alive():
|
|
|
706 |
|
707 |
print(f"์
๋ ฅ ํ
์ ์์ฑ ํ CUDA ๋ฉ๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
|
708 |
|
709 |
+
# ์คํธ๋ฆฌ๋จธ ์ค์
|
|
|
|
|
710 |
streamer = TextIteratorStreamer(
|
711 |
tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
|
712 |
)
|
713 |
+
|
714 |
+
# ์์ฑ ๋งค๊ฐ๋ณ์ ์ค์
|
715 |
generate_kwargs = dict(
|
716 |
**inputs,
|
717 |
streamer=streamer,
|
|
|
726 |
use_cache=True
|
727 |
)
|
728 |
|
|
|
|
|
729 |
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
730 |
clear_cuda_memory()
|
731 |
|
|
|
733 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
734 |
thread.start()
|
735 |
|
736 |
+
# ์๋ต ์คํธ๋ฆฌ๋ฐ
|
737 |
buffer = ""
|
738 |
+
partial_message = ""
|
739 |
+
last_yield_time = time.time()
|
740 |
+
|
741 |
try:
|
742 |
for new_text in streamer:
|
743 |
try:
|
744 |
buffer += new_text
|
745 |
+
partial_message += new_text
|
746 |
+
|
747 |
+
# ์ผ์ ์๊ฐ๋ง๋ค ๋๋ ํ
์คํธ๊ฐ ์์ผ ๋๋ง๋ค ๊ฒฐ๊ณผ ์
๋ฐ์ดํธ
|
748 |
+
current_time = time.time()
|
749 |
+
if current_time - last_yield_time > 0.1 or len(partial_message) > 20:
|
750 |
+
yield "", history + [[message, buffer]]
|
751 |
+
partial_message = ""
|
752 |
+
last_yield_time = current_time
|
753 |
except Exception as inner_e:
|
754 |
print(f"๊ฐ๋ณ ํ ํฐ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {str(inner_e)}")
|
755 |
continue
|
756 |
+
|
757 |
+
# ๋ง์ง๋ง ์๋ต ํ์ธ
|
758 |
+
if buffer:
|
759 |
+
yield "", history + [[message, buffer]]
|
760 |
+
|
761 |
+
# ๋ํ ๊ธฐ๋ก์ ์ ์ฅ
|
762 |
+
chat_history.add_conversation(message, buffer)
|
763 |
+
|
764 |
+
except Exception as e:
|
765 |
+
print(f"์คํธ๋ฆฌ๋ฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
766 |
+
if not buffer: # ๋ฒํผ๊ฐ ๋น์ด์์ผ๋ฉด ์ค๋ฅ ๋ฉ์์ง ํ์
|
767 |
+
buffer = f"์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
768 |
yield "", history + [[message, buffer]]
|
|
|
|
|
|
|
|
|
|
|
769 |
|
770 |
# ์ค๋ ๋๊ฐ ์ฌ์ ํ ์คํ ์ค์ด๋ฉด ์ข
๋ฃ ๋๊ธฐ
|
771 |
if thread.is_alive():
|