Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -368,23 +368,60 @@ def clean_text(text):
|
|
368 |
|
369 |
def get_article_content(url):
|
370 |
"""URL에서 기사 내용 스크래핑"""
|
371 |
-
if not url
|
|
|
|
|
|
|
|
|
|
|
372 |
return None
|
373 |
|
374 |
try:
|
375 |
-
headers = {
|
376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
soup = BeautifulSoup(response.text, 'html.parser')
|
378 |
|
379 |
# 불필요한 요소 제거
|
380 |
-
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
|
381 |
tag.decompose()
|
382 |
|
383 |
-
|
384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
text = clean_text(text)
|
386 |
|
|
|
|
|
|
|
387 |
return text[:4000] # 텍스트 길이 제한
|
|
|
388 |
except Exception as e:
|
389 |
print(f"Scraping error for {url}: {str(e)}")
|
390 |
return None
|
@@ -592,23 +629,32 @@ def generate_report(title, info, progress=gr.Progress()):
|
|
592 |
|
593 |
progress(0.5, desc="AI 모델 처리 중...")
|
594 |
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
return [
|
611 |
-
gr.update(value="
|
612 |
gr.update(value="접기", visible=True)
|
613 |
]
|
614 |
|
@@ -618,6 +664,8 @@ def generate_report(title, info, progress=gr.Progress()):
|
|
618 |
gr.update(value="리포팅 생성 중 오류가 발생했습니다.", visible=True),
|
619 |
gr.update(value="접기", visible=True)
|
620 |
]
|
|
|
|
|
621 |
|
622 |
def toggle_report(report_content, show_report):
|
623 |
"""리포트 표시/숨김 토글"""
|
@@ -627,42 +675,52 @@ def toggle_report(report_content, show_report):
|
|
627 |
]
|
628 |
|
629 |
css = """
|
|
|
630 |
footer {visibility: hidden;}
|
|
|
|
|
631 |
#status_area {
|
632 |
-
background: rgba(255, 255, 255, 0.9);
|
633 |
padding: 15px;
|
634 |
border-bottom: 1px solid #ddd;
|
635 |
margin-bottom: 20px;
|
636 |
-
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
637 |
}
|
|
|
638 |
#results_area {
|
639 |
padding: 10px;
|
640 |
margin-top: 10px;
|
641 |
}
|
642 |
-
|
|
|
643 |
.tabs {
|
644 |
border-bottom: 2px solid #ddd !important;
|
645 |
margin-bottom: 20px !important;
|
646 |
}
|
|
|
647 |
.tab-nav {
|
648 |
border-bottom: none !important;
|
649 |
margin-bottom: 0 !important;
|
650 |
}
|
|
|
651 |
.tab-nav button {
|
652 |
font-weight: bold !important;
|
653 |
padding: 10px 20px !important;
|
654 |
}
|
|
|
655 |
.tab-nav button.selected {
|
656 |
-
border-bottom: 2px solid #1f77b4 !important;
|
657 |
color: #1f77b4 !important;
|
658 |
}
|
659 |
-
|
|
|
660 |
#status_area .markdown-text {
|
661 |
font-size: 1.1em;
|
662 |
color: #2c3e50;
|
663 |
padding: 10px 0;
|
664 |
}
|
665 |
-
|
|
|
666 |
.group {
|
667 |
border: 1px solid #eee;
|
668 |
padding: 15px;
|
@@ -670,23 +728,20 @@ footer {visibility: hidden;}
|
|
670 |
border-radius: 5px;
|
671 |
background: white;
|
672 |
}
|
673 |
-
|
|
|
674 |
.primary-btn {
|
675 |
background: #1f77b4 !important;
|
676 |
border: none !important;
|
677 |
}
|
678 |
-
|
|
|
679 |
.textbox {
|
680 |
border: 1px solid #ddd !important;
|
681 |
border-radius: 4px !important;
|
682 |
}
|
683 |
-
.report-section {
|
684 |
-
margin-top: 15px;
|
685 |
-
padding: 15px;
|
686 |
-
border-top: 1px solid #eee;
|
687 |
-
background: #f9f9f9;
|
688 |
-
}
|
689 |
|
|
|
690 |
.hn-article-group {
|
691 |
height: auto !important;
|
692 |
min-height: 250px;
|
@@ -695,33 +750,75 @@ footer {visibility: hidden;}
|
|
695 |
border: 1px solid #eee;
|
696 |
border-radius: 5px;
|
697 |
background: white;
|
|
|
698 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
699 |
.report-content {
|
700 |
margin-top: 15px;
|
701 |
padding: 15px;
|
702 |
border-top: 1px solid #eee;
|
703 |
background: #f9f9f9;
|
|
|
|
|
|
|
704 |
}
|
705 |
|
706 |
-
|
|
|
707 |
position: fixed;
|
708 |
top: 0;
|
709 |
left: 0;
|
710 |
width: 100%;
|
711 |
height: 4px;
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
background: #1f77b4;
|
|
|
|
|
|
|
|
|
|
|
713 |
z-index: 1000;
|
714 |
}
|
|
|
|
|
715 |
.hn-article-group .report-content {
|
716 |
display: none;
|
717 |
margin-top: 15px;
|
718 |
padding: 15px;
|
719 |
border-top: 1px solid #eee;
|
720 |
background: #f9f9f9;
|
|
|
721 |
}
|
|
|
722 |
.hn-article-group .report-content.visible {
|
723 |
display: block;
|
724 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
725 |
"""
|
726 |
|
727 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface:
|
|
|
368 |
|
369 |
def get_article_content(url):
|
370 |
"""URL에서 기사 내용 스크래핑"""
|
371 |
+
if not url:
|
372 |
+
return None
|
373 |
+
|
374 |
+
# 스킵할 도메인 목록
|
375 |
+
skip_domains = ['github.com', 'twitter.com', 'linkedin.com', 'facebook.com']
|
376 |
+
if any(domain in url.lower() for domain in skip_domains):
|
377 |
return None
|
378 |
|
379 |
try:
|
380 |
+
headers = {
|
381 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
382 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
383 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
384 |
+
'Connection': 'keep-alive',
|
385 |
+
}
|
386 |
+
|
387 |
+
# 타임아웃 증가 및 재시도 설정
|
388 |
+
session = requests.Session()
|
389 |
+
retries = requests.adapters.Retry(total=3, backoff_factor=1)
|
390 |
+
session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries))
|
391 |
+
|
392 |
+
response = session.get(url, headers=headers, timeout=15)
|
393 |
+
response.raise_for_status()
|
394 |
+
|
395 |
soup = BeautifulSoup(response.text, 'html.parser')
|
396 |
|
397 |
# 불필요한 요소 제거
|
398 |
+
for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe']):
|
399 |
tag.decompose()
|
400 |
|
401 |
+
# 본문 내용 추출
|
402 |
+
article_text = ""
|
403 |
+
|
404 |
+
# article 태그 확인
|
405 |
+
article = soup.find('article')
|
406 |
+
if article:
|
407 |
+
paragraphs = article.find_all('p')
|
408 |
+
else:
|
409 |
+
# main 태그 확인
|
410 |
+
main = soup.find('main')
|
411 |
+
if main:
|
412 |
+
paragraphs = main.find_all('p')
|
413 |
+
else:
|
414 |
+
# body에서 직접 검색
|
415 |
+
paragraphs = soup.find_all('p')
|
416 |
+
|
417 |
+
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
|
418 |
text = clean_text(text)
|
419 |
|
420 |
+
if not text:
|
421 |
+
return None
|
422 |
+
|
423 |
return text[:4000] # 텍스트 길이 제한
|
424 |
+
|
425 |
except Exception as e:
|
426 |
print(f"Scraping error for {url}: {str(e)}")
|
427 |
return None
|
|
|
629 |
|
630 |
progress(0.5, desc="AI 모델 처리 중...")
|
631 |
|
632 |
+
try:
|
633 |
+
response = hf_client.text_generation(
|
634 |
+
prompt,
|
635 |
+
max_new_tokens=4000,
|
636 |
+
temperature=0.3,
|
637 |
+
repetition_penalty=1.2
|
638 |
+
)
|
639 |
+
|
640 |
+
progress(0.8, desc="결과 처리 중...")
|
641 |
+
|
642 |
+
if response:
|
643 |
+
formatted_response = f"""### AI 리포팅 결과\n\n{response}"""
|
644 |
+
return [
|
645 |
+
gr.update(value=formatted_response, visible=True),
|
646 |
+
gr.update(value="접기", visible=True)
|
647 |
+
]
|
648 |
+
else:
|
649 |
+
return [
|
650 |
+
gr.update(value="리포팅 생성에 실패했습니다.", visible=True),
|
651 |
+
gr.update(value="접기", visible=True)
|
652 |
+
]
|
653 |
+
|
654 |
+
except Exception as e:
|
655 |
+
print(f"Model error: {str(e)}")
|
656 |
return [
|
657 |
+
gr.update(value="AI 모델 처리 중 오류가 발생했습니다.", visible=True),
|
658 |
gr.update(value="접기", visible=True)
|
659 |
]
|
660 |
|
|
|
664 |
gr.update(value="리포팅 생성 중 오류가 발생했습니다.", visible=True),
|
665 |
gr.update(value="접기", visible=True)
|
666 |
]
|
667 |
+
finally:
|
668 |
+
progress(1.0, desc="완료!")
|
669 |
|
670 |
def toggle_report(report_content, show_report):
|
671 |
"""리포트 표시/숨김 토글"""
|
|
|
675 |
]
|
676 |
|
677 |
css = """
|
678 |
+
/* 전역 스타일 */
|
679 |
footer {visibility: hidden;}
|
680 |
+
|
681 |
+
/* 레이아웃 컨테이너 */
|
682 |
#status_area {
|
683 |
+
background: rgba(255, 255, 255, 0.9);
|
684 |
padding: 15px;
|
685 |
border-bottom: 1px solid #ddd;
|
686 |
margin-bottom: 20px;
|
687 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
688 |
}
|
689 |
+
|
690 |
#results_area {
|
691 |
padding: 10px;
|
692 |
margin-top: 10px;
|
693 |
}
|
694 |
+
|
695 |
+
/* 탭 스타일 */
|
696 |
.tabs {
|
697 |
border-bottom: 2px solid #ddd !important;
|
698 |
margin-bottom: 20px !important;
|
699 |
}
|
700 |
+
|
701 |
.tab-nav {
|
702 |
border-bottom: none !important;
|
703 |
margin-bottom: 0 !important;
|
704 |
}
|
705 |
+
|
706 |
.tab-nav button {
|
707 |
font-weight: bold !important;
|
708 |
padding: 10px 20px !important;
|
709 |
}
|
710 |
+
|
711 |
.tab-nav button.selected {
|
712 |
+
border-bottom: 2px solid #1f77b4 !important;
|
713 |
color: #1f77b4 !important;
|
714 |
}
|
715 |
+
|
716 |
+
/* 상태 메시지 */
|
717 |
#status_area .markdown-text {
|
718 |
font-size: 1.1em;
|
719 |
color: #2c3e50;
|
720 |
padding: 10px 0;
|
721 |
}
|
722 |
+
|
723 |
+
/* 기본 컨테이너 */
|
724 |
.group {
|
725 |
border: 1px solid #eee;
|
726 |
padding: 15px;
|
|
|
728 |
border-radius: 5px;
|
729 |
background: white;
|
730 |
}
|
731 |
+
|
732 |
+
/* 버튼 스타일 */
|
733 |
.primary-btn {
|
734 |
background: #1f77b4 !important;
|
735 |
border: none !important;
|
736 |
}
|
737 |
+
|
738 |
+
/* 입력 필드 */
|
739 |
.textbox {
|
740 |
border: 1px solid #ddd !important;
|
741 |
border-radius: 4px !important;
|
742 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
743 |
|
744 |
+
/* Hacker News 아티클 스타일 */
|
745 |
.hn-article-group {
|
746 |
height: auto !important;
|
747 |
min-height: 250px;
|
|
|
750 |
border: 1px solid #eee;
|
751 |
border-radius: 5px;
|
752 |
background: white;
|
753 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
754 |
}
|
755 |
+
|
756 |
+
/* 리포트 섹션 스타일 */
|
757 |
+
.report-section {
|
758 |
+
margin-top: 15px;
|
759 |
+
padding: 15px;
|
760 |
+
border-top: 1px solid #eee;
|
761 |
+
background: #f9f9f9;
|
762 |
+
border-radius: 4px;
|
763 |
+
}
|
764 |
+
|
765 |
.report-content {
|
766 |
margin-top: 15px;
|
767 |
padding: 15px;
|
768 |
border-top: 1px solid #eee;
|
769 |
background: #f9f9f9;
|
770 |
+
border-radius: 4px;
|
771 |
+
font-size: 0.95em;
|
772 |
+
line-height: 1.6;
|
773 |
}
|
774 |
|
775 |
+
/* 프로그레스 바 */
|
776 |
+
.progress {
|
777 |
position: fixed;
|
778 |
top: 0;
|
779 |
left: 0;
|
780 |
width: 100%;
|
781 |
height: 4px;
|
782 |
+
background: #f0f0f0;
|
783 |
+
z-index: 1000;
|
784 |
+
}
|
785 |
+
|
786 |
+
.progress-bar {
|
787 |
+
height: 100%;
|
788 |
background: #1f77b4;
|
789 |
+
transition: width 0.3s ease;
|
790 |
+
position: fixed;
|
791 |
+
top: 0;
|
792 |
+
left: 0;
|
793 |
+
width: 100%;
|
794 |
z-index: 1000;
|
795 |
}
|
796 |
+
|
797 |
+
/* 리포트 콘텐츠 토글 */
|
798 |
.hn-article-group .report-content {
|
799 |
display: none;
|
800 |
margin-top: 15px;
|
801 |
padding: 15px;
|
802 |
border-top: 1px solid #eee;
|
803 |
background: #f9f9f9;
|
804 |
+
transition: all 0.3s ease;
|
805 |
}
|
806 |
+
|
807 |
.hn-article-group .report-content.visible {
|
808 |
display: block;
|
809 |
}
|
810 |
+
|
811 |
+
/* 반응형 디자인 */
|
812 |
+
@media (max-width: 768px) {
|
813 |
+
.hn-article-group {
|
814 |
+
padding: 10px;
|
815 |
+
margin-bottom: 15px;
|
816 |
+
}
|
817 |
+
|
818 |
+
.report-content {
|
819 |
+
padding: 10px;
|
820 |
+
}
|
821 |
+
}
|
822 |
"""
|
823 |
|
824 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface:
|