File size: 6,788 Bytes
b2974e8 7119782 7e21d9b 514f0d9 7119782 322ddc5 2c02e47 b95b7b7 f45034a e5bd9f1 f45034a 2c02e47 f45034a 47e2fc4 53590af f5ed2ae f813149 514f0d9 f45034a 514f0d9 f45034a 514f0d9 f45034a 514f0d9 f45034a 53590af 7119782 7a83db7 7119782 7a83db7 8a68160 8ab7ebf 90ac6ab 4fccbbb 84eae57 90ac6ab 84eae57 7119782 8a68160 1b06ef3 188966c 081ca32 9721517 8a68160 9721517 8a68160 7119782 b7d9073 ba3ff9a 7119782 b7d9073 ba3ff9a 322ddc5 35ef342 f829878 b7d9073 e1fee45 b7d9073 e1fee45 ba3ff9a bdb66d6 b7d9073 7119782 514f0d9 8ab7ebf 514f0d9 f45034a 514f0d9 f45034a 7119782 514f0d9 bc5df64 f45034a 01c1e26 f45034a 514f0d9 f45034a 514f0d9 a905e36 514f0d9 a905e36 f45034a 514f0d9 f45034a a905e36 514f0d9 f45034a a905e36 514f0d9 a905e36 f45034a 2c02e47 01c1e26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import streamlit as st
import pandas as pd
import requests
import base64
import threading
from queue import Queue
from io import StringIO
import csv
def get_backlinks(api_login, api_key, target_url, filters):
try:
encoded_credentials = base64.b64encode(f"{api_login}:{api_key}".encode()).decode()
headers = {'Authorization': f'Basic {encoded_credentials}'}
post_data = {0: {"target": target_url, "limit": 1000, "mode": "as_is", "filters": filters}}
response = requests.post("https://api.dataforseo.com/v3/backlinks/backlinks/live", json=post_data, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
response_data = response.json()
if 'tasks' in response_data:
task_result = response_data['tasks'][0]['result']
if task_result and 'items' in task_result[0]:
items = task_result[0]['items']
df = pd.json_normalize(items)
return df
else:
st.error("Received empty 'result' from API or missing 'items'.")
return None
else:
st.error(f"No 'tasks' key in response JSON. Full response: {response_data}")
return None
except requests.RequestException as e:
st.error(f"Request error: {e}")
return None
def convert_df_to_csv(df):
return df.to_csv(index=False).encode('utf-8')
class BacklinkWorker(threading.Thread):
def __init__(self, jobs, results, api_login, api_key):
super().__init__()
self.jobs = jobs
self.results = results
self.api_login = api_login
self.api_key = api_key
def run(self):
while True:
job = self.jobs.get()
if job is None:
break
url_id, target_url, filters = job
df = get_backlinks(self.api_login, self.api_key, target_url, filters)
self.results.put((url_id, df))
def process_pasted_data(data):
data_io = StringIO(data.strip())
reader = csv.reader(data_io, delimiter='\n', quotechar='"')
return [row[0] for row in reader]
st.sidebar.title("DataForSEO API Parameters")
api_login = st.sidebar.text_input("API Login", value="[email protected]")
api_key = st.sidebar.text_input("API Key", type="password")
st.sidebar.title("Filters")
st.sidebar.info("A maximum of 8 filters are allowed through the API. That includes one for every comma-separated value below, one for every value not equal to 0 in the boxes, one for language, and one for each button ticked. If you try to add more, it will fail.")
url_from_not_contain = st.sidebar.text_input("URL from does not contain (comma-separated)", value="keyword,blogspot,/search,/tag")
backlink_spam_score = st.sidebar.number_input("Backlink Spam Score ≤", value=20)
page_from_rank_value = st.sidebar.number_input("Page From Rank Value ≥", value=0)
domain_from_rank_value = st.sidebar.number_input("Domain From Rank Value ≥", value=100)
page_from_language = st.sidebar.selectbox("Page From Language", ['en', 'other'])
is_lost = st.sidebar.checkbox("Is Lost", value=False)
dofollow = st.sidebar.checkbox("Dofollow", value=False)
is_broken = st.sidebar.checkbox("Is Broken", value=False)
filters = []
if url_from_not_contain:
for url in url_from_not_contain.split(','):
filters.append(["url_from", "not_like", f"%{url.strip()}%"])
filters.append("and")
if is_lost:
filters.append(["is_lost", "=", is_lost])
filters.append("and")
if dofollow:
filters.append(["dofollow", "=", dofollow])
filters.append("and")
if is_broken:
filters.append(["is_broken", "=", is_broken])
filters.append("and")
if page_from_rank_value != 0:
filters.append(["page_from_rank", ">=", page_from_rank_value])
filters.append("and")
if domain_from_rank_value != 0:
filters.append(["domain_from_rank", ">=", domain_from_rank_value])
filters.append("and")
filters.append(["backlink_spam_score", "<=", backlink_spam_score])
filters.append("and")
filters.append(["page_from_language", "=", page_from_language])
filters.append("and")
filters.append(["anchor", "not_like", ""])
filters.append("and")
if filters and filters[-1] == "and":
filters.pop()
num_concurrent_calls = st.sidebar.number_input("Concurrent Calls:", min_value=1, max_value=10, value=5, step=1)
data_section = st.sidebar.text_area("Paste List of URLs:")
paste_data = st.sidebar.button("Paste Data")
add_row = st.sidebar.button("Add Row")
reset = st.sidebar.button("Reset")
if paste_data:
pasted_urls = process_pasted_data(data_section)
for i, url in enumerate(pasted_urls):
st.session_state[f"target_url_{i}"] = url
st.session_state["row_count"] = len(pasted_urls)
if add_row:
row_count = st.session_state.get("row_count", 0) + 1
st.session_state["row_count"] = row_count
if reset:
st.session_state.clear()
row_count = st.session_state.get("row_count", 1)
generate_button = st.sidebar.button("Generate All")
if generate_button:
jobs = Queue()
results = Queue()
workers = [BacklinkWorker(jobs, results, api_login, api_key) for _ in range(num_concurrent_calls)]
for worker in workers:
worker.start()
for i in range(row_count):
target_url = st.session_state.get(f"target_url_{i}", "")
if target_url:
jobs.put((i, target_url, filters))
for _ in workers:
jobs.put(None)
for worker in workers:
worker.join()
while not results.empty():
url_id, df = results.get()
st.session_state[f"df_{url_id}"] = df
for i in range(row_count):
cols = st.columns(2)
target_url_key = f"target_url_{i}"
target_url = cols[0].text_input(f"Enter the target URL {i + 1}", key=target_url_key)
df_key = f"df_{i}"
df = st.session_state.get(df_key)
if df is not None:
csv = convert_df_to_csv(df)
cols[1].download_button(
label=f"Download data as CSV for URL {i + 1}",
data=csv,
file_name=f'backlinks_{i + 1}.csv',
mime='text/csv',
)
# Function to concatenate all DataFrames and convert to CSV for download
def concatenate_dfs_and_download():
dfs = [st.session_state[key] for key in st.session_state.keys() if key.startswith('df_') and st.session_state[key] is not None]
if dfs:
combined_df = pd.concat(dfs, ignore_index=True)
csv = combined_df.to_csv(index=False).encode('utf-8')
st.sidebar.download_button(
label="Download All as CSV",
data=csv,
file_name='all_backlinks.csv',
mime='text/csv',
)
else:
st.sidebar.write("No data available to download.")
concatenate_dfs_and_download() |