File size: 5,684 Bytes
ab36536
 
 
 
 
f6d39c8
ab36536
 
 
f6d39c8
 
 
99d7b92
 
f6d39c8
 
 
 
 
 
 
 
 
 
 
 
 
ab36536
 
99d7b92
f6d39c8
 
 
 
 
 
ab36536
f6d39c8
 
 
 
 
 
 
99d7b92
 
f6d39c8
 
 
 
 
 
 
 
 
 
 
 
 
ab36536
f6d39c8
ab36536
 
99d7b92
f6d39c8
 
 
 
 
 
 
fb24c70
f6d39c8
ab36536
26b9192
f6d39c8
99d7b92
ab36536
f6d39c8
99d7b92
ab36536
99d7b92
ab36536
99d7b92
 
f6d39c8
ab36536
 
 
99d7b92
 
f6d39c8
ab36536
 
 
99d7b92
 
f6d39c8
ab36536
 
 
f6d39c8
99d7b92
 
f6d39c8
99d7b92
ab36536
 
f6d39c8
ab36536
 
 
99d7b92
ab36536
fb24c70
f6d39c8
 
 
 
 
99d7b92
 
 
f6d39c8
99d7b92
 
ab36536
 
f6d39c8
 
99d7b92
ab36536
f6d39c8
 
 
99d7b92
f6d39c8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import pandas as pd
import os
import logging

logger = logging.getLogger(__name__)

def comprehensive_scroll(driver):
    """Scroll until no more new content is loaded"""
    try:
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)
            
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height
    except Exception as e:
        logger.error(f"Scroll sırasında hata: {str(e)}")

def scrape_reviews(url):
    """URL'den yorumları çeken fonksiyon"""
    driver = None
    try:
        # Data directory oluştur
        data_directory = "data"
        if not os.path.exists(data_directory):
            os.makedirs(data_directory)

        # Chrome options ayarları
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument("--window-size=1920,1080")

        # Linux için ChromeDriver ayarı
        try:
            # Önce /usr/local/bin/chromedriver'ı dene
            service = Service('/usr/local/bin/chromedriver')
            driver = webdriver.Chrome(service=service, options=chrome_options)
        except:
            try:
                # Eğer başarısız olursa /usr/bin/chromedriver'ı dene
                service = Service('/usr/bin/chromedriver')
                driver = webdriver.Chrome(service=service, options=chrome_options)
            except:
                # Son olarak PATH'teki chromedriver'ı dene
                service = Service('chromedriver')
                driver = webdriver.Chrome(service=service, options=chrome_options)
        
        logger.info(f"URL'ye erişiliyor: {url}")
        driver.get(url)
        
        # Çerez popup'ını kabul et
        try:
            WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
            ).click()
            logger.info("Çerez popup'ı kabul edildi")
        except TimeoutException:
            logger.warning("Çerez popup'ı bulunamadı veya tıklanamadı")

        logger.info("Sayfa kaydırılıyor...")
        comprehensive_scroll(driver)
        
        logger.info("Yorumlar toplanıyor...")
        comment_elements = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div')
        total_comments = len(comment_elements)
        logger.info(f"Toplam {total_comments} yorum bulundu")

        data = []
        for i in range(1, total_comments + 1):
            try:
                username_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]'
                username = driver.find_element(By.XPATH, username_xpath).text
            except NoSuchElementException:
                username = "N/A"

            try:
                comment_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p'
                comment = driver.find_element(By.XPATH, comment_xpath).text
            except NoSuchElementException:
                comment = "N/A"

            try:
                date_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]'
                date = driver.find_element(By.XPATH, date_xpath).text
            except NoSuchElementException:
                date = "N/A"

            try:
                star_xpath_base = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div'
                full_stars = driver.find_elements(By.XPATH, f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']")
                star_count = len(full_stars)
            except NoSuchElementException:
                star_count = 0

            data.append({
                "Kullanıcı_id": i,
                "Kullanıcı Adı": username,
                "Yorum": comment,
                "Tarih": date,
                "Yıldız Sayısı": star_count
            })

            if i % 10 == 0:
                logger.info(f"{i}/{total_comments} yorum toplandı")

        df = pd.DataFrame(data)
        
        # Geçici dosya olarak kaydet
        temp_file = os.path.join(data_directory, 'temp_comments.csv')
        df.to_csv(temp_file, index=False, encoding='utf-8-sig')
        logger.info(f"Veriler {temp_file} dosyasına kaydedildi")
        
        return df

    except Exception as e:
        logger.error(f"Veri çekme sırasında hata: {str(e)}")
        return pd.DataFrame()
        
    finally:
        if driver:
            driver.quit()
            logger.info("Chrome driver kapatıldı")
        # Geçici dosyayı sil
        temp_file = os.path.join("data", 'temp_comments.csv')
        if os.path.exists(temp_file):
            os.remove(temp_file)
            logger.info("Geçici dosya silindi")