Spaces:
Building
Building
Update app-backup2.py
Browse files- app-backup2.py +119 -115
app-backup2.py
CHANGED
@@ -9,6 +9,7 @@ from requests.adapters import HTTPAdapter
|
|
9 |
from requests.packages.urllib3.util.retry import Retry
|
10 |
from openai import OpenAI
|
11 |
from bs4 import BeautifulSoup
|
|
|
12 |
|
13 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
14 |
if not ACCESS_TOKEN:
|
@@ -46,6 +47,7 @@ API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
|
46 |
# ๊ตญ๊ฐ๋ณ ์ธ์ด ์ฝ๋ ๋งคํ
|
47 |
COUNTRY_LANGUAGES = {
|
48 |
"United States": "en",
|
|
|
49 |
"United Kingdom": "en",
|
50 |
"Taiwan": "zh-TW",
|
51 |
"Canada": "en",
|
@@ -116,6 +118,7 @@ COUNTRY_LANGUAGES = {
|
|
116 |
|
117 |
COUNTRY_LOCATIONS = {
|
118 |
"United States": "United States",
|
|
|
119 |
"United Kingdom": "United Kingdom",
|
120 |
"Taiwan": "Taiwan",
|
121 |
"Canada": "Canada",
|
@@ -187,6 +190,7 @@ COUNTRY_LOCATIONS = {
|
|
187 |
# ์ง์ญ ์ ์
|
188 |
# ๋์์์ ์ง์ญ
|
189 |
COUNTRY_LANGUAGES_EAST_ASIA = {
|
|
|
190 |
"Taiwan": "zh-TW",
|
191 |
"Japan": "ja",
|
192 |
"China": "zh",
|
@@ -194,6 +198,7 @@ COUNTRY_LANGUAGES_EAST_ASIA = {
|
|
194 |
}
|
195 |
|
196 |
COUNTRY_LOCATIONS_EAST_ASIA = {
|
|
|
197 |
"Taiwan": "Taiwan",
|
198 |
"Japan": "Japan",
|
199 |
"China": "China",
|
@@ -720,7 +725,6 @@ def search_global(query, region, articles_state_global):
|
|
720 |
css = """
|
721 |
/* ์ ์ญ ์คํ์ผ */
|
722 |
footer {visibility: hidden;}
|
723 |
-
|
724 |
/* ๋ ์ด์์ ์ปจํ
์ด๋ */
|
725 |
#status_area {
|
726 |
background: rgba(255, 255, 255, 0.9);
|
@@ -729,40 +733,33 @@ footer {visibility: hidden;}
|
|
729 |
margin-bottom: 20px;
|
730 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
731 |
}
|
732 |
-
|
733 |
#results_area {
|
734 |
padding: 10px;
|
735 |
margin-top: 10px;
|
736 |
}
|
737 |
-
|
738 |
/* ํญ ์คํ์ผ */
|
739 |
.tabs {
|
740 |
border-bottom: 2px solid #ddd !important;
|
741 |
margin-bottom: 20px !important;
|
742 |
}
|
743 |
-
|
744 |
.tab-nav {
|
745 |
border-bottom: none !important;
|
746 |
margin-bottom: 0 !important;
|
747 |
}
|
748 |
-
|
749 |
.tab-nav button {
|
750 |
font-weight: bold !important;
|
751 |
padding: 10px 20px !important;
|
752 |
}
|
753 |
-
|
754 |
.tab-nav button.selected {
|
755 |
border-bottom: 2px solid #1f77b4 !important;
|
756 |
color: #1f77b4 !important;
|
757 |
}
|
758 |
-
|
759 |
/* ์ํ ๋ฉ์์ง */
|
760 |
#status_area .markdown-text {
|
761 |
font-size: 1.1em;
|
762 |
color: #2c3e50;
|
763 |
padding: 10px 0;
|
764 |
}
|
765 |
-
|
766 |
/* ๊ธฐ๋ณธ ์ปจํ
์ด๋ */
|
767 |
.group {
|
768 |
border: 1px solid #eee;
|
@@ -771,19 +768,16 @@ footer {visibility: hidden;}
|
|
771 |
border-radius: 5px;
|
772 |
background: white;
|
773 |
}
|
774 |
-
|
775 |
/* ๋ฒํผ ์คํ์ผ */
|
776 |
.primary-btn {
|
777 |
background: #1f77b4 !important;
|
778 |
border: none !important;
|
779 |
}
|
780 |
-
|
781 |
/* ์
๋ ฅ ํ๋ */
|
782 |
.textbox {
|
783 |
border: 1px solid #ddd !important;
|
784 |
border-radius: 4px !important;
|
785 |
}
|
786 |
-
|
787 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ปจํ
์ด๋ */
|
788 |
.progress-container {
|
789 |
position: fixed;
|
@@ -794,7 +788,6 @@ footer {visibility: hidden;}
|
|
794 |
background: #e0e0e0;
|
795 |
z-index: 1000;
|
796 |
}
|
797 |
-
|
798 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ */
|
799 |
.progress-bar {
|
800 |
height: 100%;
|
@@ -803,7 +796,6 @@ footer {visibility: hidden;}
|
|
803 |
transition: width 0.3s ease;
|
804 |
animation: progress-glow 1.5s ease-in-out infinite;
|
805 |
}
|
806 |
-
|
807 |
/* ํ๋ก๊ทธ๋ ์ค ํ
์คํธ */
|
808 |
.progress-text {
|
809 |
position: fixed;
|
@@ -818,7 +810,6 @@ footer {visibility: hidden;}
|
|
818 |
z-index: 1001;
|
819 |
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
820 |
}
|
821 |
-
|
822 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ ๋๋ฉ์ด์
*/
|
823 |
@keyframes progress-glow {
|
824 |
0% {
|
@@ -831,7 +822,6 @@ footer {visibility: hidden;}
|
|
831 |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
832 |
}
|
833 |
}
|
834 |
-
|
835 |
/* ๋ฐ์ํ ๋์์ธ */
|
836 |
@media (max-width: 768px) {
|
837 |
.group {
|
@@ -844,32 +834,27 @@ footer {visibility: hidden;}
|
|
844 |
padding: 3px 10px;
|
845 |
}
|
846 |
}
|
847 |
-
|
848 |
/* ๋ก๋ฉ ์ํ ํ์ ๊ฐ์ */
|
849 |
.loading {
|
850 |
opacity: 0.7;
|
851 |
pointer-events: none;
|
852 |
transition: opacity 0.3s ease;
|
853 |
}
|
854 |
-
|
855 |
/* ๊ฒฐ๊ณผ ์ปจํ
์ด๋ ์ ๋๋ฉ์ด์
*/
|
856 |
.group {
|
857 |
transition: all 0.3s ease;
|
858 |
opacity: 0;
|
859 |
transform: translateY(20px);
|
860 |
}
|
861 |
-
|
862 |
.group.visible {
|
863 |
opacity: 1;
|
864 |
transform: translateY(0);
|
865 |
}
|
866 |
-
|
867 |
/* Examples ์คํ์ผ๋ง */
|
868 |
.examples-table {
|
869 |
margin-top: 10px !important;
|
870 |
margin-bottom: 20px !important;
|
871 |
}
|
872 |
-
|
873 |
.examples-table button {
|
874 |
background-color: #f0f0f0 !important;
|
875 |
border: 1px solid #ddd !important;
|
@@ -878,13 +863,11 @@ footer {visibility: hidden;}
|
|
878 |
margin: 2px !important;
|
879 |
transition: all 0.3s ease !important;
|
880 |
}
|
881 |
-
|
882 |
.examples-table button:hover {
|
883 |
background-color: #e0e0e0 !important;
|
884 |
transform: translateY(-1px) !important;
|
885 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
|
886 |
}
|
887 |
-
|
888 |
.examples-table .label {
|
889 |
font-weight: bold !important;
|
890 |
color: #444 !important;
|
@@ -898,85 +881,100 @@ def get_article_content(url):
|
|
898 |
headers = {
|
899 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
900 |
}
|
901 |
-
|
|
|
|
|
|
|
|
|
|
|
902 |
soup = BeautifulSoup(response.content, 'html.parser')
|
903 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
904 |
# ์ผ๋ฐ์ ์ธ ๊ธฐ์ฌ ๋ณธ๋ฌธ ์ปจํ
์ด๋ ๊ฒ์
|
905 |
-
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
soup.find('div', class_='content'),
|
910 |
-
soup.find('div', {'id': 'article-body'})
|
911 |
]
|
912 |
|
913 |
-
for
|
914 |
-
|
915 |
-
|
916 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
917 |
|
918 |
-
|
919 |
-
|
920 |
-
|
921 |
-
|
922 |
-
|
923 |
-
|
924 |
-
|
925 |
-
|
|
|
|
|
|
|
|
|
|
|
926 |
|
927 |
-
return content
|
928 |
except Exception as e:
|
|
|
929 |
return f"Error crawling content: {str(e)}"
|
930 |
|
931 |
-
def respond(
|
932 |
-
url,
|
933 |
-
history: list[tuple[str, str]],
|
934 |
-
system_message,
|
935 |
-
max_tokens,
|
936 |
-
temperature,
|
937 |
-
top_p,
|
938 |
-
):
|
939 |
if not url.startswith('http'):
|
940 |
history.append((url, "์ฌ๋ฐ๋ฅธ URL์ ์
๋ ฅํด์ฃผ์ธ์."))
|
941 |
return history
|
942 |
|
943 |
try:
|
944 |
-
# ๊ธฐ์ฌ ๋ด์ฉ ์ถ์ถ
|
945 |
article_content = get_article_content(url)
|
946 |
|
947 |
-
|
948 |
-
|
949 |
-
|
950 |
-
|
951 |
-
|
952 |
-
|
953 |
-
|
954 |
-
|
955 |
-
|
956 |
-
|
957 |
-
|
958 |
-
|
959 |
-
|
960 |
-
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
|
965 |
-
|
966 |
-
|
967 |
-
|
968 |
-
|
969 |
-
|
970 |
-
|
|
|
971 |
|
972 |
messages = [
|
973 |
{
|
974 |
-
"role": "system",
|
975 |
-
"content":
|
976 |
-
๋ชจ๋ ์์
์ ๋ฐ๋์ ๋ค์ ๋ ๋จ๊ณ๋ก ์งํํ๊ณ , ๊ฐ ๋จ๊ณ๋ฅผ ๋ช
ํํ ๊ตฌ๋ถํ์ฌ ์ถ๋ ฅํด์ผ ํฉ๋๋ค:
|
977 |
-
1. ์๋ฌธ ๋ฒ์ญ: ===๋ฒ์ญ=== ํ์ ํ ์ ํํ ํ๊ตญ์ด ๋ฒ์ญ ์ ๊ณต
|
978 |
-
2. ๊ธฐ์ฌ ์์ฑ: ===๊ธฐ์ฌ=== ํ์ ํ ๋ฒ์ญ๋ณธ์ ๊ธฐ๋ฐ์ผ๋ก ํ๊ตญ์ด ๋ด์ค ๊ธฐ์ฌ ์์ฑ
|
979 |
-
๋ ๋จ๊ณ๋ฅผ ๊ฑด๋๋ฐ๊ฑฐ๋ ํตํฉํ์ง ๋ง๊ณ ๋ฐ๋์ ์์ฐจ์ ์ผ๋ก ์งํํ์ธ์."""
|
980 |
},
|
981 |
{"role": "user", "content": translation_prompt}
|
982 |
]
|
@@ -984,8 +982,6 @@ def respond(
|
|
984 |
history.append((url, "๋ฒ์ญ ๋ฐ ๊ธฐ์ฌ ์์ฑ์ ์์ํฉ๋๋ค..."))
|
985 |
|
986 |
full_response = ""
|
987 |
-
current_section = ""
|
988 |
-
|
989 |
for message in client.chat.completions.create(
|
990 |
model="CohereForAI/c4ai-command-r-plus-08-2024",
|
991 |
max_tokens=max_tokens,
|
@@ -998,11 +994,6 @@ def respond(
|
|
998 |
token = message.choices[0].delta.content
|
999 |
if token:
|
1000 |
full_response += token
|
1001 |
-
# ์น์
๊ตฌ๋ถ์ ํ์ธ ๋ฐ ํฌ๋งทํ
|
1002 |
-
if "===๋ฒ์ญ===" in token or "===๊ธฐ์ฌ===" in token:
|
1003 |
-
current_section = token.strip()
|
1004 |
-
full_response += "\n\n"
|
1005 |
-
|
1006 |
history[-1] = (url, full_response)
|
1007 |
yield history
|
1008 |
|
@@ -1120,7 +1111,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น
|
|
1120 |
# AI ๋ฒ์ญ ํญ ์ถ๊ฐ
|
1121 |
with gr.Tab("AI ๊ธฐ์ฌ ์์ฑ"):
|
1122 |
gr.Markdown("๋ด์ค URL์ ์
๋ ฅํ๋ฉด AI๊ฐ ํ๊ตญ์ด๋ก ๋ฒ์ญํ์ฌ ๊ธฐ์ฌ ํ์์ผ๋ก ์์ฑํฉ๋๋ค.")
|
1123 |
-
|
|
|
1124 |
with gr.Column():
|
1125 |
chatbot = gr.Chatbot(height=600)
|
1126 |
|
@@ -1129,32 +1121,44 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น
|
|
1129 |
label="๋ด์ค URL",
|
1130 |
placeholder="https://..."
|
1131 |
)
|
1132 |
-
|
1133 |
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False):
|
|
|
1134 |
system_message = gr.Textbox(
|
1135 |
-
|
1136 |
-
|
1137 |
-
|
1138 |
-
|
1139 |
-
|
1140 |
-
|
1141 |
-
|
1142 |
-
|
1143 |
-
|
1144 |
-
|
1145 |
-
|
1146 |
-
|
1147 |
-
|
1148 |
-
|
1149 |
-
|
1150 |
-
|
1151 |
-
|
1152 |
-
|
1153 |
-
|
1154 |
-
|
1155 |
-
|
1156 |
-
|
1157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1158 |
|
1159 |
max_tokens = gr.Slider(
|
1160 |
minimum=1,
|
@@ -1234,7 +1238,7 @@ iface.launch(
|
|
1234 |
server_name="0.0.0.0",
|
1235 |
server_port=7860,
|
1236 |
share=True,
|
1237 |
-
auth=("
|
1238 |
ssl_verify=False,
|
1239 |
show_error=True
|
1240 |
-
)
|
|
|
9 |
from requests.packages.urllib3.util.retry import Retry
|
10 |
from openai import OpenAI
|
11 |
from bs4 import BeautifulSoup
|
12 |
+
import re # re ๋ชจ๋ ์ถ๊ฐ
|
13 |
|
14 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
15 |
if not ACCESS_TOKEN:
|
|
|
47 |
# ๊ตญ๊ฐ๋ณ ์ธ์ด ์ฝ๋ ๋งคํ
|
48 |
COUNTRY_LANGUAGES = {
|
49 |
"United States": "en",
|
50 |
+
"KOREA": "ko",
|
51 |
"United Kingdom": "en",
|
52 |
"Taiwan": "zh-TW",
|
53 |
"Canada": "en",
|
|
|
118 |
|
119 |
COUNTRY_LOCATIONS = {
|
120 |
"United States": "United States",
|
121 |
+
"KOREA": "kr",
|
122 |
"United Kingdom": "United Kingdom",
|
123 |
"Taiwan": "Taiwan",
|
124 |
"Canada": "Canada",
|
|
|
190 |
# ์ง์ญ ์ ์
|
191 |
# ๋์์์ ์ง์ญ
|
192 |
COUNTRY_LANGUAGES_EAST_ASIA = {
|
193 |
+
"KOREA": "ko",
|
194 |
"Taiwan": "zh-TW",
|
195 |
"Japan": "ja",
|
196 |
"China": "zh",
|
|
|
198 |
}
|
199 |
|
200 |
COUNTRY_LOCATIONS_EAST_ASIA = {
|
201 |
+
"KOREA": "KOREA",
|
202 |
"Taiwan": "Taiwan",
|
203 |
"Japan": "Japan",
|
204 |
"China": "China",
|
|
|
725 |
css = """
|
726 |
/* ์ ์ญ ์คํ์ผ */
|
727 |
footer {visibility: hidden;}
|
|
|
728 |
/* ๋ ์ด์์ ์ปจํ
์ด๋ */
|
729 |
#status_area {
|
730 |
background: rgba(255, 255, 255, 0.9);
|
|
|
733 |
margin-bottom: 20px;
|
734 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
735 |
}
|
|
|
736 |
#results_area {
|
737 |
padding: 10px;
|
738 |
margin-top: 10px;
|
739 |
}
|
|
|
740 |
/* ํญ ์คํ์ผ */
|
741 |
.tabs {
|
742 |
border-bottom: 2px solid #ddd !important;
|
743 |
margin-bottom: 20px !important;
|
744 |
}
|
|
|
745 |
.tab-nav {
|
746 |
border-bottom: none !important;
|
747 |
margin-bottom: 0 !important;
|
748 |
}
|
|
|
749 |
.tab-nav button {
|
750 |
font-weight: bold !important;
|
751 |
padding: 10px 20px !important;
|
752 |
}
|
|
|
753 |
.tab-nav button.selected {
|
754 |
border-bottom: 2px solid #1f77b4 !important;
|
755 |
color: #1f77b4 !important;
|
756 |
}
|
|
|
757 |
/* ์ํ ๋ฉ์์ง */
|
758 |
#status_area .markdown-text {
|
759 |
font-size: 1.1em;
|
760 |
color: #2c3e50;
|
761 |
padding: 10px 0;
|
762 |
}
|
|
|
763 |
/* ๊ธฐ๋ณธ ์ปจํ
์ด๋ */
|
764 |
.group {
|
765 |
border: 1px solid #eee;
|
|
|
768 |
border-radius: 5px;
|
769 |
background: white;
|
770 |
}
|
|
|
771 |
/* ๋ฒํผ ์คํ์ผ */
|
772 |
.primary-btn {
|
773 |
background: #1f77b4 !important;
|
774 |
border: none !important;
|
775 |
}
|
|
|
776 |
/* ์
๋ ฅ ํ๋ */
|
777 |
.textbox {
|
778 |
border: 1px solid #ddd !important;
|
779 |
border-radius: 4px !important;
|
780 |
}
|
|
|
781 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ปจํ
์ด๋ */
|
782 |
.progress-container {
|
783 |
position: fixed;
|
|
|
788 |
background: #e0e0e0;
|
789 |
z-index: 1000;
|
790 |
}
|
|
|
791 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ */
|
792 |
.progress-bar {
|
793 |
height: 100%;
|
|
|
796 |
transition: width 0.3s ease;
|
797 |
animation: progress-glow 1.5s ease-in-out infinite;
|
798 |
}
|
|
|
799 |
/* ํ๋ก๊ทธ๋ ์ค ํ
์คํธ */
|
800 |
.progress-text {
|
801 |
position: fixed;
|
|
|
810 |
z-index: 1001;
|
811 |
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
812 |
}
|
|
|
813 |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ ๋๋ฉ์ด์
*/
|
814 |
@keyframes progress-glow {
|
815 |
0% {
|
|
|
822 |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
823 |
}
|
824 |
}
|
|
|
825 |
/* ๋ฐ์ํ ๋์์ธ */
|
826 |
@media (max-width: 768px) {
|
827 |
.group {
|
|
|
834 |
padding: 3px 10px;
|
835 |
}
|
836 |
}
|
|
|
837 |
/* ๋ก๋ฉ ์ํ ํ์ ๊ฐ์ */
|
838 |
.loading {
|
839 |
opacity: 0.7;
|
840 |
pointer-events: none;
|
841 |
transition: opacity 0.3s ease;
|
842 |
}
|
|
|
843 |
/* ๊ฒฐ๊ณผ ์ปจํ
์ด๋ ์ ๋๋ฉ์ด์
*/
|
844 |
.group {
|
845 |
transition: all 0.3s ease;
|
846 |
opacity: 0;
|
847 |
transform: translateY(20px);
|
848 |
}
|
|
|
849 |
.group.visible {
|
850 |
opacity: 1;
|
851 |
transform: translateY(0);
|
852 |
}
|
|
|
853 |
/* Examples ์คํ์ผ๋ง */
|
854 |
.examples-table {
|
855 |
margin-top: 10px !important;
|
856 |
margin-bottom: 20px !important;
|
857 |
}
|
|
|
858 |
.examples-table button {
|
859 |
background-color: #f0f0f0 !important;
|
860 |
border: 1px solid #ddd !important;
|
|
|
863 |
margin: 2px !important;
|
864 |
transition: all 0.3s ease !important;
|
865 |
}
|
|
|
866 |
.examples-table button:hover {
|
867 |
background-color: #e0e0e0 !important;
|
868 |
transform: translateY(-1px) !important;
|
869 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
|
870 |
}
|
|
|
871 |
.examples-table .label {
|
872 |
font-weight: bold !important;
|
873 |
color: #444 !important;
|
|
|
881 |
headers = {
|
882 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
883 |
}
|
884 |
+
session = requests.Session()
|
885 |
+
retries = Retry(total=3, backoff_factor=0.5)
|
886 |
+
session.mount('https://', HTTPAdapter(max_retries=retries))
|
887 |
+
|
888 |
+
response = session.get(url, headers=headers, timeout=30)
|
889 |
+
response.raise_for_status()
|
890 |
soup = BeautifulSoup(response.content, 'html.parser')
|
891 |
|
892 |
+
# ๋ฉํ ๋ฐ์ดํฐ ์ถ์ถ
|
893 |
+
title = soup.find('meta', property='og:title') or soup.find('title')
|
894 |
+
title = title.get('content', '') if hasattr(title, 'get') else title.string if title else ''
|
895 |
+
|
896 |
+
description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'})
|
897 |
+
description = description.get('content', '') if description else ''
|
898 |
+
|
899 |
+
# ๋ณธ๋ฌธ ์ถ์ถ ๊ฐ์
|
900 |
+
article_content = ''
|
901 |
+
|
902 |
# ์ผ๋ฐ์ ์ธ ๊ธฐ์ฌ ๋ณธ๋ฌธ ์ปจํ
์ด๋ ๊ฒ์
|
903 |
+
content_selectors = [
|
904 |
+
'article', '.article-body', '.article-content', '#article-body',
|
905 |
+
'.story-body', '.post-content', '.entry-content', '.content-body',
|
906 |
+
'[itemprop="articleBody"]', '.story-content'
|
|
|
|
|
907 |
]
|
908 |
|
909 |
+
for selector in content_selectors:
|
910 |
+
content = soup.select_one(selector)
|
911 |
+
if content:
|
912 |
+
# ๋ถํ์ํ ์์ ์ ๊ฑฐ
|
913 |
+
for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
914 |
+
tag.decompose()
|
915 |
+
|
916 |
+
# ๋จ๋ฝ ์ถ์ถ
|
917 |
+
paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
918 |
+
if paragraphs:
|
919 |
+
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
|
920 |
+
break
|
921 |
|
922 |
+
# ๋ฐฑ์
๋ฐฉ๋ฒ: ๋ชจ๋ ๋จ๋ฝ ์ถ์ถ
|
923 |
+
if not article_content:
|
924 |
+
paragraphs = soup.find_all('p')
|
925 |
+
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50])
|
926 |
+
|
927 |
+
# ์ต์ข
์ฝํ
์ธ ๊ตฌ์ฑ
|
928 |
+
full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}"
|
929 |
+
|
930 |
+
# ํ
์คํธ ์ ์
|
931 |
+
full_content = re.sub(r'\s+', ' ', full_content) # ์ฐ์๋ ๊ณต๋ฐฑ ์ ๊ฑฐ
|
932 |
+
full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # ์ฐ์๋ ๋น ์ค ์ ๊ฑฐ
|
933 |
+
|
934 |
+
return full_content.strip()
|
935 |
|
|
|
936 |
except Exception as e:
|
937 |
+
print(f"Crawling error details: {str(e)}") # ๋๋ฒ๊น
์ ์ํ ์์ธ ์๋ฌ ์ถ๋ ฅ
|
938 |
return f"Error crawling content: {str(e)}"
|
939 |
|
940 |
+
def respond(url, history, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
941 |
if not url.startswith('http'):
|
942 |
history.append((url, "์ฌ๋ฐ๋ฅธ URL์ ์
๋ ฅํด์ฃผ์ธ์."))
|
943 |
return history
|
944 |
|
945 |
try:
|
|
|
946 |
article_content = get_article_content(url)
|
947 |
|
948 |
+
translation_prompt = f"""๋ค์ ์๋ฌธ ๊ธฐ์ฌ๋ฅผ ํ๊ตญ์ด๋ก ๋ฒ์ญํ๊ณ ๊ธฐ์ฌ๋ฅผ ์์ฑํด์ฃผ์ธ์.
|
949 |
+
1๋จ๊ณ: ์ ๋ฌธ ๋ฒ์ญ
|
950 |
+
===๋ฒ์ญ ์์===
|
951 |
+
{article_content}
|
952 |
+
===๋ฒ์ญ ๋===
|
953 |
+
2๋จ๊ณ: ๊ธฐ์ฌ ์์ฑ ๊ฐ์ด๋๋ผ์ธ
|
954 |
+
๋ค์ ์๊ตฌ์ฌํญ์ ๋ฐ๋ผ ํ๊ตญ์ด ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ธ์:
|
955 |
+
1. ๊ตฌ์กฐ
|
956 |
+
- ํค๋๋ผ์ธ: ํต์ฌ ๋ด์ฉ์ ๋ด์ ๊ฐ๋ ฅํ ์ ๋ชฉ
|
957 |
+
- ๋ถ์ ๋ชฉ: ํค๋๋ผ์ธ ๋ณด์ ์ค๋ช
|
958 |
+
- ๋ฆฌ๋๋ฌธ: ๊ธฐ์ฌ์ ํต์ฌ์ ์์ฝํ ์ฒซ ๋ฌธ๋จ
|
959 |
+
- ๋ณธ๋ฌธ: ์์ธ ๋ด์ฉ ์ ๊ฐ
|
960 |
+
2. ์์ฑ ๊ท์น
|
961 |
+
- ๊ฐ๊ด์ ์ด๊ณ ์ ํํ ์ฌ์ค ์ ๋ฌ
|
962 |
+
- ๋ฌธ์ฅ์ '๋ค.'๋ก ์ข
๊ฒฐ
|
963 |
+
- ๋จ๋ฝ ๊ฐ ์์ฐ์ค๋ฌ์ด ํ๋ฆ
|
964 |
+
- ์ธ์ฉ๊ตฌ๋ ๋ฐ์ดํ ์ฒ๋ฆฌ
|
965 |
+
- ํต์ฌ ์ ๋ณด๋ฅผ ์๋ถ๋ถ์ ๋ฐฐ์น
|
966 |
+
- ์ ๋ฌธ ์ฉ์ด๋ ์ ์ ํ ์ค๋ช
์ถ๊ฐ
|
967 |
+
3. ํ์
|
968 |
+
- ์ ์ ํ ๋จ๋ฝ ๊ตฌ๋ถ
|
969 |
+
- ์ฝ๊ธฐ ์ฌ์ด ๋ฌธ์ฅ ๊ธธ์ด
|
970 |
+
- ๋
ผ๋ฆฌ์ ์ธ ์ ๋ณด ๊ตฌ์ฑ
|
971 |
+
๊ฐ ๋จ๊ณ๋ '===๋ฒ์ญ===', '===๊ธฐ์ฌ==='๋ก ๋ช
ํํ ๊ตฌ๋ถํ์ฌ ์ถ๋ ฅํ์ธ์.
|
972 |
+
"""
|
973 |
|
974 |
messages = [
|
975 |
{
|
976 |
+
"role": "system",
|
977 |
+
"content": system_message
|
|
|
|
|
|
|
|
|
978 |
},
|
979 |
{"role": "user", "content": translation_prompt}
|
980 |
]
|
|
|
982 |
history.append((url, "๋ฒ์ญ ๋ฐ ๊ธฐ์ฌ ์์ฑ์ ์์ํฉ๋๋ค..."))
|
983 |
|
984 |
full_response = ""
|
|
|
|
|
985 |
for message in client.chat.completions.create(
|
986 |
model="CohereForAI/c4ai-command-r-plus-08-2024",
|
987 |
max_tokens=max_tokens,
|
|
|
994 |
token = message.choices[0].delta.content
|
995 |
if token:
|
996 |
full_response += token
|
|
|
|
|
|
|
|
|
|
|
997 |
history[-1] = (url, full_response)
|
998 |
yield history
|
999 |
|
|
|
1111 |
# AI ๋ฒ์ญ ํญ ์ถ๊ฐ
|
1112 |
with gr.Tab("AI ๊ธฐ์ฌ ์์ฑ"):
|
1113 |
gr.Markdown("๋ด์ค URL์ ์
๋ ฅํ๋ฉด AI๊ฐ ํ๊ตญ์ด๋ก ๋ฒ์ญํ์ฌ ๊ธฐ์ฌ ํ์์ผ๋ก ์์ฑํฉ๋๋ค.")
|
1114 |
+
gr.Markdown("์ด๋ฏธ์ง ์์ฑ: https://huggingface.co/spaces/ginipick/FLUXllama ")
|
1115 |
+
|
1116 |
with gr.Column():
|
1117 |
chatbot = gr.Chatbot(height=600)
|
1118 |
|
|
|
1121 |
label="๋ด์ค URL",
|
1122 |
placeholder="https://..."
|
1123 |
)
|
1124 |
+
|
1125 |
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False):
|
1126 |
+
|
1127 |
system_message = gr.Textbox(
|
1128 |
+
value="""You are a professional translator and journalist. Follow these steps strictly:
|
1129 |
+
1. TRANSLATION
|
1130 |
+
- Start with ===๋ฒ์ญ=== marker
|
1131 |
+
- Provide accurate Korean translation
|
1132 |
+
- Maintain original meaning and context
|
1133 |
+
2. ARTICLE WRITING
|
1134 |
+
- Start with ===๊ธฐ์ฌ=== marker
|
1135 |
+
- Write a new Korean news article based on the translation
|
1136 |
+
- Follow newspaper article format
|
1137 |
+
- Use formal news writing style
|
1138 |
+
- End sentences with '๋ค.'
|
1139 |
+
- Include headline and subheadline
|
1140 |
+
- Organize paragraphs clearly
|
1141 |
+
- Put key information first
|
1142 |
+
- Use quotes appropriately
|
1143 |
+
|
1144 |
+
3. IMAGE PROMPT GENERATION
|
1145 |
+
- Start with ===์ด๋ฏธ์ง ํ๋กฌํํธ=== marker
|
1146 |
+
- Create detailed Korean prompts for image generation
|
1147 |
+
- Prompts should reflect the article's main theme and content
|
1148 |
+
- Include key visual elements mentioned in the article
|
1149 |
+
- Specify style, mood, and composition
|
1150 |
+
- Format: "์ด๋ฏธ์ง ์ค๋ช
: [์์ธ ์ค๋ช
]"
|
1151 |
+
- Add style keywords: "์คํ์ผ: [๊ด๋ จ ํค์๋๋ค]"
|
1152 |
+
- Add mood keywords: "๋ถ์๊ธฐ: [๊ด๋ จ ํค์๋๋ค]"
|
1153 |
+
IMPORTANT:
|
1154 |
+
- Must complete all three steps in order
|
1155 |
+
- Clearly separate each section with markers
|
1156 |
+
- Never skip or combine steps
|
1157 |
+
- Ensure image prompts align with article content""",
|
1158 |
+
label="System message"
|
1159 |
+
)
|
1160 |
+
|
1161 |
+
|
1162 |
|
1163 |
max_tokens = gr.Slider(
|
1164 |
minimum=1,
|
|
|
1238 |
server_name="0.0.0.0",
|
1239 |
server_port=7860,
|
1240 |
share=True,
|
1241 |
+
auth=("gini","pick"),
|
1242 |
ssl_verify=False,
|
1243 |
show_error=True
|
1244 |
+
)
|