ginipick commited on
Commit
3b6333e
ยท
verified ยท
1 Parent(s): c50da9a

Update app-backup2.py

Browse files
Files changed (1) hide show
  1. app-backup2.py +119 -115
app-backup2.py CHANGED
@@ -9,6 +9,7 @@ from requests.adapters import HTTPAdapter
9
  from requests.packages.urllib3.util.retry import Retry
10
  from openai import OpenAI
11
  from bs4 import BeautifulSoup
 
12
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
  if not ACCESS_TOKEN:
@@ -46,6 +47,7 @@ API_KEY = os.getenv("SERPHOUSE_API_KEY")
46
  # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
47
  COUNTRY_LANGUAGES = {
48
  "United States": "en",
 
49
  "United Kingdom": "en",
50
  "Taiwan": "zh-TW",
51
  "Canada": "en",
@@ -116,6 +118,7 @@ COUNTRY_LANGUAGES = {
116
 
117
  COUNTRY_LOCATIONS = {
118
  "United States": "United States",
 
119
  "United Kingdom": "United Kingdom",
120
  "Taiwan": "Taiwan",
121
  "Canada": "Canada",
@@ -187,6 +190,7 @@ COUNTRY_LOCATIONS = {
187
  # ์ง€์—ญ ์ •์˜
188
  # ๋™์•„์‹œ์•„ ์ง€์—ญ
189
  COUNTRY_LANGUAGES_EAST_ASIA = {
 
190
  "Taiwan": "zh-TW",
191
  "Japan": "ja",
192
  "China": "zh",
@@ -194,6 +198,7 @@ COUNTRY_LANGUAGES_EAST_ASIA = {
194
  }
195
 
196
  COUNTRY_LOCATIONS_EAST_ASIA = {
 
197
  "Taiwan": "Taiwan",
198
  "Japan": "Japan",
199
  "China": "China",
@@ -720,7 +725,6 @@ def search_global(query, region, articles_state_global):
720
  css = """
721
  /* ์ „์—ญ ์Šคํƒ€์ผ */
722
  footer {visibility: hidden;}
723
-
724
  /* ๋ ˆ์ด์•„์›ƒ ์ปจํ…Œ์ด๋„ˆ */
725
  #status_area {
726
  background: rgba(255, 255, 255, 0.9);
@@ -729,40 +733,33 @@ footer {visibility: hidden;}
729
  margin-bottom: 20px;
730
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
731
  }
732
-
733
  #results_area {
734
  padding: 10px;
735
  margin-top: 10px;
736
  }
737
-
738
  /* ํƒญ ์Šคํƒ€์ผ */
739
  .tabs {
740
  border-bottom: 2px solid #ddd !important;
741
  margin-bottom: 20px !important;
742
  }
743
-
744
  .tab-nav {
745
  border-bottom: none !important;
746
  margin-bottom: 0 !important;
747
  }
748
-
749
  .tab-nav button {
750
  font-weight: bold !important;
751
  padding: 10px 20px !important;
752
  }
753
-
754
  .tab-nav button.selected {
755
  border-bottom: 2px solid #1f77b4 !important;
756
  color: #1f77b4 !important;
757
  }
758
-
759
  /* ์ƒํƒœ ๋ฉ”์‹œ์ง€ */
760
  #status_area .markdown-text {
761
  font-size: 1.1em;
762
  color: #2c3e50;
763
  padding: 10px 0;
764
  }
765
-
766
  /* ๊ธฐ๋ณธ ์ปจํ…Œ์ด๋„ˆ */
767
  .group {
768
  border: 1px solid #eee;
@@ -771,19 +768,16 @@ footer {visibility: hidden;}
771
  border-radius: 5px;
772
  background: white;
773
  }
774
-
775
  /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
776
  .primary-btn {
777
  background: #1f77b4 !important;
778
  border: none !important;
779
  }
780
-
781
  /* ์ž…๋ ฅ ํ•„๋“œ */
782
  .textbox {
783
  border: 1px solid #ddd !important;
784
  border-radius: 4px !important;
785
  }
786
-
787
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์ปจํ…Œ์ด๋„ˆ */
788
  .progress-container {
789
  position: fixed;
@@ -794,7 +788,6 @@ footer {visibility: hidden;}
794
  background: #e0e0e0;
795
  z-index: 1000;
796
  }
797
-
798
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” */
799
  .progress-bar {
800
  height: 100%;
@@ -803,7 +796,6 @@ footer {visibility: hidden;}
803
  transition: width 0.3s ease;
804
  animation: progress-glow 1.5s ease-in-out infinite;
805
  }
806
-
807
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค ํ…์ŠคํŠธ */
808
  .progress-text {
809
  position: fixed;
@@ -818,7 +810,6 @@ footer {visibility: hidden;}
818
  z-index: 1001;
819
  box-shadow: 0 2px 5px rgba(0,0,0,0.2);
820
  }
821
-
822
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์• ๋‹ˆ๋ฉ”์ด์…˜ */
823
  @keyframes progress-glow {
824
  0% {
@@ -831,7 +822,6 @@ footer {visibility: hidden;}
831
  box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
832
  }
833
  }
834
-
835
  /* ๋ฐ˜์‘ํ˜• ๋””์ž์ธ */
836
  @media (max-width: 768px) {
837
  .group {
@@ -844,32 +834,27 @@ footer {visibility: hidden;}
844
  padding: 3px 10px;
845
  }
846
  }
847
-
848
  /* ๋กœ๋”ฉ ์ƒํƒœ ํ‘œ์‹œ ๊ฐœ์„  */
849
  .loading {
850
  opacity: 0.7;
851
  pointer-events: none;
852
  transition: opacity 0.3s ease;
853
  }
854
-
855
  /* ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
856
  .group {
857
  transition: all 0.3s ease;
858
  opacity: 0;
859
  transform: translateY(20px);
860
  }
861
-
862
  .group.visible {
863
  opacity: 1;
864
  transform: translateY(0);
865
  }
866
-
867
  /* Examples ์Šคํƒ€์ผ๋ง */
868
  .examples-table {
869
  margin-top: 10px !important;
870
  margin-bottom: 20px !important;
871
  }
872
-
873
  .examples-table button {
874
  background-color: #f0f0f0 !important;
875
  border: 1px solid #ddd !important;
@@ -878,13 +863,11 @@ footer {visibility: hidden;}
878
  margin: 2px !important;
879
  transition: all 0.3s ease !important;
880
  }
881
-
882
  .examples-table button:hover {
883
  background-color: #e0e0e0 !important;
884
  transform: translateY(-1px) !important;
885
  box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
886
  }
887
-
888
  .examples-table .label {
889
  font-weight: bold !important;
890
  color: #444 !important;
@@ -898,85 +881,100 @@ def get_article_content(url):
898
  headers = {
899
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
900
  }
901
- response = requests.get(url, headers=headers)
 
 
 
 
 
902
  soup = BeautifulSoup(response.content, 'html.parser')
903
 
 
 
 
 
 
 
 
 
 
 
904
  # ์ผ๋ฐ˜์ ์ธ ๊ธฐ์‚ฌ ๋ณธ๋ฌธ ์ปจํ…Œ์ด๋„ˆ ๊ฒ€์ƒ‰
905
- article_body = None
906
- possible_content_elements = [
907
- soup.find('article'),
908
- soup.find('div', class_='article-body'),
909
- soup.find('div', class_='content'),
910
- soup.find('div', {'id': 'article-body'})
911
  ]
912
 
913
- for element in possible_content_elements:
914
- if element:
915
- article_body = element
916
- break
 
 
 
 
 
 
 
 
917
 
918
- if article_body:
919
- # ๋ถˆํ•„์š”ํ•œ ์š”์†Œ ์ œ๊ฑฐ
920
- for tag in article_body.find_all(['script', 'style', 'nav', 'header', 'footer']):
921
- tag.decompose()
922
-
923
- content = ' '.join([p.get_text().strip() for p in article_body.find_all('p') if p.get_text().strip()])
924
- else:
925
- content = ' '.join([p.get_text().strip() for p in soup.find_all('p') if p.get_text().strip()])
 
 
 
 
 
926
 
927
- return content
928
  except Exception as e:
 
929
  return f"Error crawling content: {str(e)}"
930
 
931
- def respond(
932
- url,
933
- history: list[tuple[str, str]],
934
- system_message,
935
- max_tokens,
936
- temperature,
937
- top_p,
938
- ):
939
  if not url.startswith('http'):
940
  history.append((url, "์˜ฌ๋ฐ”๋ฅธ URL์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."))
941
  return history
942
 
943
  try:
944
- # ๊ธฐ์‚ฌ ๋‚ด์šฉ ์ถ”์ถœ
945
  article_content = get_article_content(url)
946
 
947
- # 2๋‹จ๊ณ„ ํ”„๋กœ์„ธ์Šค๋ฅผ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
948
- translation_prompt = f"""๋‹ค์Œ ์ž‘์—…์„ ์ˆœ์ฐจ์ ์œผ๋กœ ์ˆ˜ํ–‰ํ•˜์„ธ์š”:
949
-
950
- 1๋‹จ๊ณ„: ๋ฒˆ์—ญ
951
- ์•„๋ž˜ ์˜๋ฌธ ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ๋ฒˆ์—ญํ•˜์„ธ์š”.
952
- ๊ตฌ๋ถ„์„ : ===๋ฒˆ์—ญ ์‹œ์ž‘===
953
- {article_content}
954
- ๊ตฌ๋ถ„์„ : ===๋ฒˆ์—ญ ๋===
955
-
956
- 2๋‹จ๊ณ„: ๊ธฐ์‚ฌ ์ž‘์„ฑ
957
- ์œ„์˜ ๋ฒˆ์—ญ๋œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒˆ๋กœ์šด ํ•œ๊ตญ์–ด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”.
958
- ๋‹ค์Œ ํ˜•์‹์„ ๋ฐ˜๋“œ์‹œ ์ค€์ˆ˜ํ•˜์„ธ์š”:
959
- - ์ œ๋ชฉ: [ํ—ค๋“œ๋ผ์ธ]
960
- - ๋ถ€์ œ: [์„œ๋ธŒํ—ค๋“œ๋ผ์ธ]
961
- - ๋ณธ๋ฌธ: [๊ธฐ์‚ฌ ๋‚ด์šฉ]
962
- - ์ž‘์„ฑ ๊ทœ์น™:
963
- * ๋ฌธ์žฅ์€ '๋‹ค.'๋กœ ๋๋‚˜์•ผ ํ•จ
964
- * ์‹ ๋ฌธ ๊ธฐ์‚ฌ ํ˜•์‹ ์ค€์ˆ˜
965
- * ๋‹จ๋ฝ ๊ตฌ๋ถ„์„ ๋ช…ํ™•ํžˆ ํ•  ๊ฒƒ
966
- * ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์•ž๋ถ€๋ถ„์— ๋ฐฐ์น˜
967
- * ์ธ์šฉ๊ตฌ๋Š” ๋”ฐ์˜ดํ‘œ๋กœ ์ฒ˜๋ฆฌ
968
-
969
- ๊ฐ ๋‹จ๊ณ„๋Š” '===๋ฒˆ์—ญ===', '===๊ธฐ์‚ฌ==='๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ถœ๋ ฅํ•˜์„ธ์š”.
970
- """
 
971
 
972
  messages = [
973
  {
974
- "role": "system",
975
- "content": """๋‹น์‹ ์€ ์ „๋ฌธ ๋ฒˆ์—ญ๊ฐ€์ด์ž ๊ธฐ์ž์ž…๋‹ˆ๋‹ค.
976
- ๋ชจ๋“  ์ž‘์—…์€ ๋ฐ˜๋“œ์‹œ ๋‹ค์Œ ๋‘ ๋‹จ๊ณ„๋กœ ์ง„ํ–‰ํ•˜๊ณ , ๊ฐ ๋‹จ๊ณ„๋ฅผ ๋ช…ํ™•ํžˆ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ถœ๋ ฅํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
977
- 1. ์›๋ฌธ ๋ฒˆ์—ญ: ===๋ฒˆ์—ญ=== ํ‘œ์‹œ ํ›„ ์ •ํ™•ํ•œ ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ์ œ๊ณต
978
- 2. ๊ธฐ์‚ฌ ์ž‘์„ฑ: ===๊ธฐ์‚ฌ=== ํ‘œ์‹œ ํ›„ ๋ฒˆ์—ญ๋ณธ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ๊ตญ์–ด ๋‰ด์Šค ๊ธฐ์‚ฌ ์ž‘์„ฑ
979
- ๋‘ ๋‹จ๊ณ„๋ฅผ ๊ฑด๋„ˆ๋›ฐ๊ฑฐ๋‚˜ ํ†ตํ•ฉํ•˜์ง€ ๋ง๊ณ  ๋ฐ˜๋“œ์‹œ ์ˆœ์ฐจ์ ์œผ๋กœ ์ง„ํ–‰ํ•˜์„ธ์š”."""
980
  },
981
  {"role": "user", "content": translation_prompt}
982
  ]
@@ -984,8 +982,6 @@ def respond(
984
  history.append((url, "๋ฒˆ์—ญ ๋ฐ ๊ธฐ์‚ฌ ์ž‘์„ฑ์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค..."))
985
 
986
  full_response = ""
987
- current_section = ""
988
-
989
  for message in client.chat.completions.create(
990
  model="CohereForAI/c4ai-command-r-plus-08-2024",
991
  max_tokens=max_tokens,
@@ -998,11 +994,6 @@ def respond(
998
  token = message.choices[0].delta.content
999
  if token:
1000
  full_response += token
1001
- # ์„น์…˜ ๊ตฌ๋ถ„์ž ํ™•์ธ ๋ฐ ํฌ๋งทํŒ…
1002
- if "===๋ฒˆ์—ญ===" in token or "===๊ธฐ์‚ฌ===" in token:
1003
- current_section = token.strip()
1004
- full_response += "\n\n"
1005
-
1006
  history[-1] = (url, full_response)
1007
  yield history
1008
 
@@ -1120,7 +1111,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์„œ๋น„
1120
  # AI ๋ฒˆ์—ญ ํƒญ ์ถ”๊ฐ€
1121
  with gr.Tab("AI ๊ธฐ์‚ฌ ์ƒ์„ฑ"):
1122
  gr.Markdown("๋‰ด์Šค URL์„ ์ž…๋ ฅํ•˜๋ฉด AI๊ฐ€ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜์—ฌ ๊ธฐ์‚ฌ ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.")
1123
-
 
1124
  with gr.Column():
1125
  chatbot = gr.Chatbot(height=600)
1126
 
@@ -1129,32 +1121,44 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์„œ๋น„
1129
  label="๋‰ด์Šค URL",
1130
  placeholder="https://..."
1131
  )
1132
-
1133
  with gr.Accordion("๊ณ ๊ธ‰ ์„ค์ •", open=False):
 
1134
  system_message = gr.Textbox(
1135
- value="""You are a professional translator and journalist. Follow these steps strictly:
1136
- 1. TRANSLATION
1137
- - Start with ===๋ฒˆ์—ญ=== marker
1138
- - Provide accurate Korean translation
1139
- - Maintain original meaning and context
1140
-
1141
- 2. ARTICLE WRITING
1142
- - Start with ===๊ธฐ์‚ฌ=== marker
1143
- - Write a new Korean news article based on the translation
1144
- - Follow newspaper article format
1145
- - Use formal news writing style
1146
- - End sentences with '๋‹ค.'
1147
- - Include headline and subheadline
1148
- - Organize paragraphs clearly
1149
- - Put key information first
1150
- - Use quotes appropriately
1151
-
1152
- IMPORTANT:
1153
- - Must complete both steps in order
1154
- - Clearly separate each section with markers
1155
- - Never skip or combine steps""",
1156
- label="System message"
1157
- )
 
 
 
 
 
 
 
 
 
 
 
1158
 
1159
  max_tokens = gr.Slider(
1160
  minimum=1,
@@ -1234,7 +1238,7 @@ iface.launch(
1234
  server_name="0.0.0.0",
1235
  server_port=7860,
1236
  share=True,
1237
- auth=("ai","news"),
1238
  ssl_verify=False,
1239
  show_error=True
1240
- )
 
9
  from requests.packages.urllib3.util.retry import Retry
10
  from openai import OpenAI
11
  from bs4 import BeautifulSoup
12
+ import re # re ๋ชจ๋“ˆ ์ถ”๊ฐ€
13
 
14
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
15
  if not ACCESS_TOKEN:
 
47
  # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
48
  COUNTRY_LANGUAGES = {
49
  "United States": "en",
50
+ "KOREA": "ko",
51
  "United Kingdom": "en",
52
  "Taiwan": "zh-TW",
53
  "Canada": "en",
 
118
 
119
  COUNTRY_LOCATIONS = {
120
  "United States": "United States",
121
+ "KOREA": "kr",
122
  "United Kingdom": "United Kingdom",
123
  "Taiwan": "Taiwan",
124
  "Canada": "Canada",
 
190
  # ์ง€์—ญ ์ •์˜
191
  # ๋™์•„์‹œ์•„ ์ง€์—ญ
192
  COUNTRY_LANGUAGES_EAST_ASIA = {
193
+ "KOREA": "ko",
194
  "Taiwan": "zh-TW",
195
  "Japan": "ja",
196
  "China": "zh",
 
198
  }
199
 
200
  COUNTRY_LOCATIONS_EAST_ASIA = {
201
+ "KOREA": "KOREA",
202
  "Taiwan": "Taiwan",
203
  "Japan": "Japan",
204
  "China": "China",
 
725
  css = """
726
  /* ์ „์—ญ ์Šคํƒ€์ผ */
727
  footer {visibility: hidden;}
 
728
  /* ๋ ˆ์ด์•„์›ƒ ์ปจํ…Œ์ด๋„ˆ */
729
  #status_area {
730
  background: rgba(255, 255, 255, 0.9);
 
733
  margin-bottom: 20px;
734
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
735
  }
 
736
  #results_area {
737
  padding: 10px;
738
  margin-top: 10px;
739
  }
 
740
  /* ํƒญ ์Šคํƒ€์ผ */
741
  .tabs {
742
  border-bottom: 2px solid #ddd !important;
743
  margin-bottom: 20px !important;
744
  }
 
745
  .tab-nav {
746
  border-bottom: none !important;
747
  margin-bottom: 0 !important;
748
  }
 
749
  .tab-nav button {
750
  font-weight: bold !important;
751
  padding: 10px 20px !important;
752
  }
 
753
  .tab-nav button.selected {
754
  border-bottom: 2px solid #1f77b4 !important;
755
  color: #1f77b4 !important;
756
  }
 
757
  /* ์ƒํƒœ ๋ฉ”์‹œ์ง€ */
758
  #status_area .markdown-text {
759
  font-size: 1.1em;
760
  color: #2c3e50;
761
  padding: 10px 0;
762
  }
 
763
  /* ๊ธฐ๋ณธ ์ปจํ…Œ์ด๋„ˆ */
764
  .group {
765
  border: 1px solid #eee;
 
768
  border-radius: 5px;
769
  background: white;
770
  }
 
771
  /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
772
  .primary-btn {
773
  background: #1f77b4 !important;
774
  border: none !important;
775
  }
 
776
  /* ์ž…๋ ฅ ํ•„๋“œ */
777
  .textbox {
778
  border: 1px solid #ddd !important;
779
  border-radius: 4px !important;
780
  }
 
781
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์ปจํ…Œ์ด๋„ˆ */
782
  .progress-container {
783
  position: fixed;
 
788
  background: #e0e0e0;
789
  z-index: 1000;
790
  }
 
791
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” */
792
  .progress-bar {
793
  height: 100%;
 
796
  transition: width 0.3s ease;
797
  animation: progress-glow 1.5s ease-in-out infinite;
798
  }
 
799
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค ํ…์ŠคํŠธ */
800
  .progress-text {
801
  position: fixed;
 
810
  z-index: 1001;
811
  box-shadow: 0 2px 5px rgba(0,0,0,0.2);
812
  }
 
813
  /* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์• ๋‹ˆ๋ฉ”์ด์…˜ */
814
  @keyframes progress-glow {
815
  0% {
 
822
  box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
823
  }
824
  }
 
825
  /* ๋ฐ˜์‘ํ˜• ๋””์ž์ธ */
826
  @media (max-width: 768px) {
827
  .group {
 
834
  padding: 3px 10px;
835
  }
836
  }
 
837
  /* ๋กœ๋”ฉ ์ƒํƒœ ํ‘œ์‹œ ๊ฐœ์„  */
838
  .loading {
839
  opacity: 0.7;
840
  pointer-events: none;
841
  transition: opacity 0.3s ease;
842
  }
 
843
  /* ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
844
  .group {
845
  transition: all 0.3s ease;
846
  opacity: 0;
847
  transform: translateY(20px);
848
  }
 
849
  .group.visible {
850
  opacity: 1;
851
  transform: translateY(0);
852
  }
 
853
  /* Examples ์Šคํƒ€์ผ๋ง */
854
  .examples-table {
855
  margin-top: 10px !important;
856
  margin-bottom: 20px !important;
857
  }
 
858
  .examples-table button {
859
  background-color: #f0f0f0 !important;
860
  border: 1px solid #ddd !important;
 
863
  margin: 2px !important;
864
  transition: all 0.3s ease !important;
865
  }
 
866
  .examples-table button:hover {
867
  background-color: #e0e0e0 !important;
868
  transform: translateY(-1px) !important;
869
  box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
870
  }
 
871
  .examples-table .label {
872
  font-weight: bold !important;
873
  color: #444 !important;
 
881
  headers = {
882
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
883
  }
884
+ session = requests.Session()
885
+ retries = Retry(total=3, backoff_factor=0.5)
886
+ session.mount('https://', HTTPAdapter(max_retries=retries))
887
+
888
+ response = session.get(url, headers=headers, timeout=30)
889
+ response.raise_for_status()
890
  soup = BeautifulSoup(response.content, 'html.parser')
891
 
892
+ # ๋ฉ”ํƒ€ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
893
+ title = soup.find('meta', property='og:title') or soup.find('title')
894
+ title = title.get('content', '') if hasattr(title, 'get') else title.string if title else ''
895
+
896
+ description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'})
897
+ description = description.get('content', '') if description else ''
898
+
899
+ # ๋ณธ๋ฌธ ์ถ”์ถœ ๊ฐœ์„ 
900
+ article_content = ''
901
+
902
  # ์ผ๋ฐ˜์ ์ธ ๊ธฐ์‚ฌ ๋ณธ๋ฌธ ์ปจํ…Œ์ด๋„ˆ ๊ฒ€์ƒ‰
903
+ content_selectors = [
904
+ 'article', '.article-body', '.article-content', '#article-body',
905
+ '.story-body', '.post-content', '.entry-content', '.content-body',
906
+ '[itemprop="articleBody"]', '.story-content'
 
 
907
  ]
908
 
909
+ for selector in content_selectors:
910
+ content = soup.select_one(selector)
911
+ if content:
912
+ # ๋ถˆํ•„์š”ํ•œ ์š”์†Œ ์ œ๊ฑฐ
913
+ for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']):
914
+ tag.decompose()
915
+
916
+ # ๋‹จ๋ฝ ์ถ”์ถœ
917
+ paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
918
+ if paragraphs:
919
+ article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
920
+ break
921
 
922
+ # ๋ฐฑ์—… ๋ฐฉ๋ฒ•: ๋ชจ๋“  ๋‹จ๋ฝ ์ถ”์ถœ
923
+ if not article_content:
924
+ paragraphs = soup.find_all('p')
925
+ article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50])
926
+
927
+ # ์ตœ์ข… ์ฝ˜ํ…์ธ  ๊ตฌ์„ฑ
928
+ full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}"
929
+
930
+ # ํ…์ŠคํŠธ ์ •์ œ
931
+ full_content = re.sub(r'\s+', ' ', full_content) # ์—ฐ์†๋œ ๊ณต๋ฐฑ ์ œ๊ฑฐ
932
+ full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # ์—ฐ์†๋œ ๋นˆ ์ค„ ์ œ๊ฑฐ
933
+
934
+ return full_content.strip()
935
 
 
936
  except Exception as e:
937
+ print(f"Crawling error details: {str(e)}") # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์ƒ์„ธ ์—๋Ÿฌ ์ถœ๋ ฅ
938
  return f"Error crawling content: {str(e)}"
939
 
940
+ def respond(url, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
941
  if not url.startswith('http'):
942
  history.append((url, "์˜ฌ๋ฐ”๋ฅธ URL์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."))
943
  return history
944
 
945
  try:
 
946
  article_content = get_article_content(url)
947
 
948
+ translation_prompt = f"""๋‹ค์Œ ์˜๋ฌธ ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ณ  ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
949
+ 1๋‹จ๊ณ„: ์ „๋ฌธ ๋ฒˆ์—ญ
950
+ ===๋ฒˆ์—ญ ์‹œ์ž‘===
951
+ {article_content}
952
+ ===๋ฒˆ์—ญ ๋===
953
+ 2๋‹จ๊ณ„: ๊ธฐ์‚ฌ ์ž‘์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ
954
+ ๋‹ค์Œ ์š”๊ตฌ์‚ฌํ•ญ์— ๋”ฐ๋ผ ํ•œ๊ตญ์–ด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”:
955
+ 1. ๊ตฌ์กฐ
956
+ - ํ—ค๋“œ๋ผ์ธ: ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๋‹ด์€ ๊ฐ•๋ ฅํ•œ ์ œ๋ชฉ
957
+ - ๋ถ€์ œ๋ชฉ: ํ—ค๋“œ๋ผ์ธ ๋ณด์™„ ์„ค๋ช…
958
+ - ๋ฆฌ๋“œ๋ฌธ: ๊ธฐ์‚ฌ์˜ ํ•ต์‹ฌ์„ ์š”์•ฝํ•œ ์ฒซ ๋ฌธ๋‹จ
959
+ - ๋ณธ๋ฌธ: ์ƒ์„ธ ๋‚ด์šฉ ์ „๊ฐœ
960
+ 2. ์ž‘์„ฑ ๊ทœ์น™
961
+ - ๊ฐ๊ด€์ ์ด๊ณ  ์ •ํ™•ํ•œ ์‚ฌ์‹ค ์ „๋‹ฌ
962
+ - ๋ฌธ์žฅ์€ '๋‹ค.'๋กœ ์ข…๊ฒฐ
963
+ - ๋‹จ๋ฝ ๊ฐ„ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ๋ฆ„
964
+ - ์ธ์šฉ๊ตฌ๋Š” ๋”ฐ์˜ดํ‘œ ์ฒ˜๋ฆฌ
965
+ - ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์•ž๋ถ€๋ถ„์— ๋ฐฐ์น˜
966
+ - ์ „๋ฌธ ์šฉ์–ด๋Š” ์ ์ ˆํ•œ ์„ค๋ช… ์ถ”๊ฐ€
967
+ 3. ํ˜•์‹
968
+ - ์ ์ ˆํ•œ ๋‹จ๋ฝ ๊ตฌ๋ถ„
969
+ - ์ฝ๊ธฐ ์‰ฌ์šด ๋ฌธ์žฅ ๊ธธ์ด
970
+ - ๋…ผ๋ฆฌ์ ์ธ ์ •๋ณด ๊ตฌ์„ฑ
971
+ ๊ฐ ๋‹จ๊ณ„๋Š” '===๋ฒˆ์—ญ===', '===๊ธฐ์‚ฌ==='๋กœ ๋ช…ํ™•ํžˆ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ถœ๋ ฅํ•˜์„ธ์š”.
972
+ """
973
 
974
  messages = [
975
  {
976
+ "role": "system",
977
+ "content": system_message
 
 
 
 
978
  },
979
  {"role": "user", "content": translation_prompt}
980
  ]
 
982
  history.append((url, "๋ฒˆ์—ญ ๋ฐ ๊ธฐ์‚ฌ ์ž‘์„ฑ์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค..."))
983
 
984
  full_response = ""
 
 
985
  for message in client.chat.completions.create(
986
  model="CohereForAI/c4ai-command-r-plus-08-2024",
987
  max_tokens=max_tokens,
 
994
  token = message.choices[0].delta.content
995
  if token:
996
  full_response += token
 
 
 
 
 
997
  history[-1] = (url, full_response)
998
  yield history
999
 
 
1111
  # AI ๋ฒˆ์—ญ ํƒญ ์ถ”๊ฐ€
1112
  with gr.Tab("AI ๊ธฐ์‚ฌ ์ƒ์„ฑ"):
1113
  gr.Markdown("๋‰ด์Šค URL์„ ์ž…๋ ฅํ•˜๋ฉด AI๊ฐ€ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜์—ฌ ๊ธฐ์‚ฌ ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.")
1114
+ gr.Markdown("์ด๋ฏธ์ง€ ์ƒ์„ฑ: https://huggingface.co/spaces/ginipick/FLUXllama ")
1115
+
1116
  with gr.Column():
1117
  chatbot = gr.Chatbot(height=600)
1118
 
 
1121
  label="๋‰ด์Šค URL",
1122
  placeholder="https://..."
1123
  )
1124
+
1125
  with gr.Accordion("๊ณ ๊ธ‰ ์„ค์ •", open=False):
1126
+
1127
  system_message = gr.Textbox(
1128
+ value="""You are a professional translator and journalist. Follow these steps strictly:
1129
+ 1. TRANSLATION
1130
+ - Start with ===๋ฒˆ์—ญ=== marker
1131
+ - Provide accurate Korean translation
1132
+ - Maintain original meaning and context
1133
+ 2. ARTICLE WRITING
1134
+ - Start with ===๊ธฐ์‚ฌ=== marker
1135
+ - Write a new Korean news article based on the translation
1136
+ - Follow newspaper article format
1137
+ - Use formal news writing style
1138
+ - End sentences with '๋‹ค.'
1139
+ - Include headline and subheadline
1140
+ - Organize paragraphs clearly
1141
+ - Put key information first
1142
+ - Use quotes appropriately
1143
+
1144
+ 3. IMAGE PROMPT GENERATION
1145
+ - Start with ===์ด๋ฏธ์ง€ ํ”„๋กฌํ”„ํŠธ=== marker
1146
+ - Create detailed Korean prompts for image generation
1147
+ - Prompts should reflect the article's main theme and content
1148
+ - Include key visual elements mentioned in the article
1149
+ - Specify style, mood, and composition
1150
+ - Format: "์ด๋ฏธ์ง€ ์„ค๋ช…: [์ƒ์„ธ ์„ค๋ช…]"
1151
+ - Add style keywords: "์Šคํƒ€์ผ: [๊ด€๋ จ ํ‚ค์›Œ๋“œ๋“ค]"
1152
+ - Add mood keywords: "๋ถ„์œ„๊ธฐ: [๊ด€๋ จ ํ‚ค์›Œ๋“œ๋“ค]"
1153
+ IMPORTANT:
1154
+ - Must complete all three steps in order
1155
+ - Clearly separate each section with markers
1156
+ - Never skip or combine steps
1157
+ - Ensure image prompts align with article content""",
1158
+ label="System message"
1159
+ )
1160
+
1161
+
1162
 
1163
  max_tokens = gr.Slider(
1164
  minimum=1,
 
1238
  server_name="0.0.0.0",
1239
  server_port=7860,
1240
  share=True,
1241
+ auth=("gini","pick"),
1242
  ssl_verify=False,
1243
  show_error=True
1244
+ )