openfree commited on
Commit
b86c8bd
·
verified ·
1 Parent(s): b499730

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -37
app.py CHANGED
@@ -368,23 +368,60 @@ def clean_text(text):
368
 
369
  def get_article_content(url):
370
  """URL에서 기사 내용 스크래핑"""
371
- if not url or 'github.com' in url or 'twitter.com' in url:
 
 
 
 
 
372
  return None
373
 
374
  try:
375
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
376
- response = requests.get(url, headers=headers, timeout=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  soup = BeautifulSoup(response.text, 'html.parser')
378
 
379
  # 불필요한 요소 제거
380
- for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
381
  tag.decompose()
382
 
383
- paragraphs = soup.find_all('p')
384
- text = ' '.join(p.get_text() for p in paragraphs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  text = clean_text(text)
386
 
 
 
 
387
  return text[:4000] # 텍스트 길이 제한
 
388
  except Exception as e:
389
  print(f"Scraping error for {url}: {str(e)}")
390
  return None
@@ -592,23 +629,32 @@ def generate_report(title, info, progress=gr.Progress()):
592
 
593
  progress(0.5, desc="AI 모델 처리 중...")
594
 
595
- response = hf_client.text_generation(
596
- prompt,
597
- max_new_tokens=4000,
598
- temperature=0.3,
599
- repetition_penalty=1.2
600
- )
601
-
602
- progress(1.0, desc="완료!")
603
-
604
- if response:
605
- return [
606
- gr.update(value=response, visible=True), # report_content
607
- gr.update(value="접기", visible=True) # show_report
608
- ]
609
- else:
 
 
 
 
 
 
 
 
 
610
  return [
611
- gr.update(value="리포팅 생성에 실패했습니다.", visible=True),
612
  gr.update(value="접기", visible=True)
613
  ]
614
 
@@ -618,6 +664,8 @@ def generate_report(title, info, progress=gr.Progress()):
618
  gr.update(value="리포팅 생성 중 오류가 발생했습니다.", visible=True),
619
  gr.update(value="접기", visible=True)
620
  ]
 
 
621
 
622
  def toggle_report(report_content, show_report):
623
  """리포트 표시/숨김 토글"""
@@ -627,42 +675,52 @@ def toggle_report(report_content, show_report):
627
  ]
628
 
629
  css = """
 
630
  footer {visibility: hidden;}
 
 
631
  #status_area {
632
- background: rgba(255, 255, 255, 0.9); /* 약간 투명한 흰색 배경 */
633
  padding: 15px;
634
  border-bottom: 1px solid #ddd;
635
  margin-bottom: 20px;
636
- box-shadow: 0 2px 5px rgba(0,0,0,0.1); /* 부드러운 그림자 효과 */
637
  }
 
638
  #results_area {
639
  padding: 10px;
640
  margin-top: 10px;
641
  }
642
- /* 탭 스타일 개선 */
 
643
  .tabs {
644
  border-bottom: 2px solid #ddd !important;
645
  margin-bottom: 20px !important;
646
  }
 
647
  .tab-nav {
648
  border-bottom: none !important;
649
  margin-bottom: 0 !important;
650
  }
 
651
  .tab-nav button {
652
  font-weight: bold !important;
653
  padding: 10px 20px !important;
654
  }
 
655
  .tab-nav button.selected {
656
- border-bottom: 2px solid #1f77b4 !important; /* 선택된 탭 강조 */
657
  color: #1f77b4 !important;
658
  }
659
- /* 검색 상태 메시지 스타일 */
 
660
  #status_area .markdown-text {
661
  font-size: 1.1em;
662
  color: #2c3e50;
663
  padding: 10px 0;
664
  }
665
- /* 검색 결과 컨테이너 스타일 */
 
666
  .group {
667
  border: 1px solid #eee;
668
  padding: 15px;
@@ -670,23 +728,20 @@ footer {visibility: hidden;}
670
  border-radius: 5px;
671
  background: white;
672
  }
673
- /* 검색 버튼 스타일 */
 
674
  .primary-btn {
675
  background: #1f77b4 !important;
676
  border: none !important;
677
  }
678
- /* 검색어 입력창 스타일 */
 
679
  .textbox {
680
  border: 1px solid #ddd !important;
681
  border-radius: 4px !important;
682
  }
683
- .report-section {
684
- margin-top: 15px;
685
- padding: 15px;
686
- border-top: 1px solid #eee;
687
- background: #f9f9f9;
688
- }
689
 
 
690
  .hn-article-group {
691
  height: auto !important;
692
  min-height: 250px;
@@ -695,33 +750,75 @@ footer {visibility: hidden;}
695
  border: 1px solid #eee;
696
  border-radius: 5px;
697
  background: white;
 
698
  }
 
 
 
 
 
 
 
 
 
 
699
  .report-content {
700
  margin-top: 15px;
701
  padding: 15px;
702
  border-top: 1px solid #eee;
703
  background: #f9f9f9;
 
 
 
704
  }
705
 
706
- .progress-bar {
 
707
  position: fixed;
708
  top: 0;
709
  left: 0;
710
  width: 100%;
711
  height: 4px;
 
 
 
 
 
 
712
  background: #1f77b4;
 
 
 
 
 
713
  z-index: 1000;
714
  }
 
 
715
  .hn-article-group .report-content {
716
  display: none;
717
  margin-top: 15px;
718
  padding: 15px;
719
  border-top: 1px solid #eee;
720
  background: #f9f9f9;
 
721
  }
 
722
  .hn-article-group .report-content.visible {
723
  display: block;
724
  }
 
 
 
 
 
 
 
 
 
 
 
 
725
  """
726
 
727
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface:
 
368
 
369
  def get_article_content(url):
370
  """URL에서 기사 내용 스크래핑"""
371
+ if not url:
372
+ return None
373
+
374
+ # 스킵할 도메인 목록
375
+ skip_domains = ['github.com', 'twitter.com', 'linkedin.com', 'facebook.com']
376
+ if any(domain in url.lower() for domain in skip_domains):
377
  return None
378
 
379
  try:
380
+ headers = {
381
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
382
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
383
+ 'Accept-Language': 'en-US,en;q=0.5',
384
+ 'Connection': 'keep-alive',
385
+ }
386
+
387
+ # 타임아웃 증가 및 재시도 설정
388
+ session = requests.Session()
389
+ retries = requests.adapters.Retry(total=3, backoff_factor=1)
390
+ session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries))
391
+
392
+ response = session.get(url, headers=headers, timeout=15)
393
+ response.raise_for_status()
394
+
395
  soup = BeautifulSoup(response.text, 'html.parser')
396
 
397
  # 불필요한 요소 제거
398
+ for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe']):
399
  tag.decompose()
400
 
401
+ # 본문 내용 추출
402
+ article_text = ""
403
+
404
+ # article 태그 확인
405
+ article = soup.find('article')
406
+ if article:
407
+ paragraphs = article.find_all('p')
408
+ else:
409
+ # main 태그 확인
410
+ main = soup.find('main')
411
+ if main:
412
+ paragraphs = main.find_all('p')
413
+ else:
414
+ # body에서 직접 검색
415
+ paragraphs = soup.find_all('p')
416
+
417
+ text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
418
  text = clean_text(text)
419
 
420
+ if not text:
421
+ return None
422
+
423
  return text[:4000] # 텍스트 길이 제한
424
+
425
  except Exception as e:
426
  print(f"Scraping error for {url}: {str(e)}")
427
  return None
 
629
 
630
  progress(0.5, desc="AI 모델 처리 중...")
631
 
632
+ try:
633
+ response = hf_client.text_generation(
634
+ prompt,
635
+ max_new_tokens=4000,
636
+ temperature=0.3,
637
+ repetition_penalty=1.2
638
+ )
639
+
640
+ progress(0.8, desc="결과 처리 중...")
641
+
642
+ if response:
643
+ formatted_response = f"""### AI 리포팅 결과\n\n{response}"""
644
+ return [
645
+ gr.update(value=formatted_response, visible=True),
646
+ gr.update(value="접기", visible=True)
647
+ ]
648
+ else:
649
+ return [
650
+ gr.update(value="리포팅 생성에 실패했습니다.", visible=True),
651
+ gr.update(value="접기", visible=True)
652
+ ]
653
+
654
+ except Exception as e:
655
+ print(f"Model error: {str(e)}")
656
  return [
657
+ gr.update(value="AI 모델 처리 중 오류가 발생했습니다.", visible=True),
658
  gr.update(value="접기", visible=True)
659
  ]
660
 
 
664
  gr.update(value="리포팅 생성 중 오류가 발생했습니다.", visible=True),
665
  gr.update(value="접기", visible=True)
666
  ]
667
+ finally:
668
+ progress(1.0, desc="완료!")
669
 
670
  def toggle_report(report_content, show_report):
671
  """리포트 표시/숨김 토글"""
 
675
  ]
676
 
677
  css = """
678
+ /* 전역 스타일 */
679
  footer {visibility: hidden;}
680
+
681
+ /* 레이아웃 컨테이너 */
682
  #status_area {
683
+ background: rgba(255, 255, 255, 0.9);
684
  padding: 15px;
685
  border-bottom: 1px solid #ddd;
686
  margin-bottom: 20px;
687
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
688
  }
689
+
690
  #results_area {
691
  padding: 10px;
692
  margin-top: 10px;
693
  }
694
+
695
+ /* 탭 스타일 */
696
  .tabs {
697
  border-bottom: 2px solid #ddd !important;
698
  margin-bottom: 20px !important;
699
  }
700
+
701
  .tab-nav {
702
  border-bottom: none !important;
703
  margin-bottom: 0 !important;
704
  }
705
+
706
  .tab-nav button {
707
  font-weight: bold !important;
708
  padding: 10px 20px !important;
709
  }
710
+
711
  .tab-nav button.selected {
712
+ border-bottom: 2px solid #1f77b4 !important;
713
  color: #1f77b4 !important;
714
  }
715
+
716
+ /* 상태 메시지 */
717
  #status_area .markdown-text {
718
  font-size: 1.1em;
719
  color: #2c3e50;
720
  padding: 10px 0;
721
  }
722
+
723
+ /* 기본 컨테이너 */
724
  .group {
725
  border: 1px solid #eee;
726
  padding: 15px;
 
728
  border-radius: 5px;
729
  background: white;
730
  }
731
+
732
+ /* 버튼 스타일 */
733
  .primary-btn {
734
  background: #1f77b4 !important;
735
  border: none !important;
736
  }
737
+
738
+ /* 입력 필드 */
739
  .textbox {
740
  border: 1px solid #ddd !important;
741
  border-radius: 4px !important;
742
  }
 
 
 
 
 
 
743
 
744
+ /* Hacker News 아티클 스타일 */
745
  .hn-article-group {
746
  height: auto !important;
747
  min-height: 250px;
 
750
  border: 1px solid #eee;
751
  border-radius: 5px;
752
  background: white;
753
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
754
  }
755
+
756
+ /* 리포트 섹션 스타일 */
757
+ .report-section {
758
+ margin-top: 15px;
759
+ padding: 15px;
760
+ border-top: 1px solid #eee;
761
+ background: #f9f9f9;
762
+ border-radius: 4px;
763
+ }
764
+
765
  .report-content {
766
  margin-top: 15px;
767
  padding: 15px;
768
  border-top: 1px solid #eee;
769
  background: #f9f9f9;
770
+ border-radius: 4px;
771
+ font-size: 0.95em;
772
+ line-height: 1.6;
773
  }
774
 
775
+ /* 프로그레스 바 */
776
+ .progress {
777
  position: fixed;
778
  top: 0;
779
  left: 0;
780
  width: 100%;
781
  height: 4px;
782
+ background: #f0f0f0;
783
+ z-index: 1000;
784
+ }
785
+
786
+ .progress-bar {
787
+ height: 100%;
788
  background: #1f77b4;
789
+ transition: width 0.3s ease;
790
+ position: fixed;
791
+ top: 0;
792
+ left: 0;
793
+ width: 100%;
794
  z-index: 1000;
795
  }
796
+
797
+ /* 리포트 콘텐츠 토글 */
798
  .hn-article-group .report-content {
799
  display: none;
800
  margin-top: 15px;
801
  padding: 15px;
802
  border-top: 1px solid #eee;
803
  background: #f9f9f9;
804
+ transition: all 0.3s ease;
805
  }
806
+
807
  .hn-article-group .report-content.visible {
808
  display: block;
809
  }
810
+
811
+ /* 반응형 디자인 */
812
+ @media (max-width: 768px) {
813
+ .hn-article-group {
814
+ padding: 10px;
815
+ margin-bottom: 15px;
816
+ }
817
+
818
+ .report-content {
819
+ padding: 10px;
820
+ }
821
+ }
822
  """
823
 
824
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface: