|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
import base64 |
|
import threading |
|
from queue import Queue |
|
from io import StringIO |
|
import csv |
|
|
|
def get_backlinks(api_login, api_key, target_url, filters): |
|
try: |
|
encoded_credentials = base64.b64encode(f"{api_login}:{api_key}".encode()).decode() |
|
headers = {'Authorization': f'Basic {encoded_credentials}'} |
|
post_data = {0: {"target": target_url, "limit": 1000, "mode": "as_is", "filters": filters}} |
|
response = requests.post("https://api.dataforseo.com/v3/backlinks/backlinks/live", json=post_data, headers=headers) |
|
response.raise_for_status() |
|
|
|
response_data = response.json() |
|
if 'tasks' in response_data: |
|
task_result = response_data['tasks'][0]['result'] |
|
if task_result and 'items' in task_result[0]: |
|
items = task_result[0]['items'] |
|
df = pd.json_normalize(items) |
|
return df |
|
else: |
|
st.error("Received empty 'result' from API or missing 'items'.") |
|
return None |
|
else: |
|
st.error(f"No 'tasks' key in response JSON. Full response: {response_data}") |
|
return None |
|
except requests.RequestException as e: |
|
st.error(f"Request error: {e}") |
|
return None |
|
|
|
def convert_df_to_csv(df): |
|
return df.to_csv(index=False).encode('utf-8') |
|
|
|
class BacklinkWorker(threading.Thread): |
|
def __init__(self, jobs, results, api_login, api_key): |
|
super().__init__() |
|
self.jobs = jobs |
|
self.results = results |
|
self.api_login = api_login |
|
self.api_key = api_key |
|
|
|
def run(self): |
|
while True: |
|
job = self.jobs.get() |
|
if job is None: |
|
break |
|
url_id, target_url, filters = job |
|
df = get_backlinks(self.api_login, self.api_key, target_url, filters) |
|
self.results.put((url_id, df)) |
|
|
|
def process_pasted_data(data): |
|
data_io = StringIO(data.strip()) |
|
reader = csv.reader(data_io, delimiter='\n', quotechar='"') |
|
return [row[0] for row in reader] |
|
|
|
st.sidebar.title("DataForSEO API Parameters") |
|
api_login = st.sidebar.text_input("API Login", value="[email protected]") |
|
api_key = st.sidebar.text_input("API Key", type="password") |
|
|
|
st.sidebar.title("Filters") |
|
st.sidebar.info("A maximum of 8 filters are allowed through the API. That includes one for every comma-separated value below, one for every value not equal to 0 in the boxes, one for language, and one for each button ticked. If you try to add more, it will fail.") |
|
url_from_not_contain = st.sidebar.text_input("URL from does not contain (comma-separated)", value="keyword,blogspot,/search,/tag") |
|
backlink_spam_score = st.sidebar.number_input("Backlink Spam Score ≤", value=20) |
|
page_from_rank_value = st.sidebar.number_input("Page From Rank Value ≥", value=0) |
|
domain_from_rank_value = st.sidebar.number_input("Domain From Rank Value ≥", value=100) |
|
page_from_language = st.sidebar.selectbox("Page From Language", ['en', 'other']) |
|
is_lost = st.sidebar.checkbox("Is Lost", value=False) |
|
dofollow = st.sidebar.checkbox("Dofollow", value=False) |
|
is_broken = st.sidebar.checkbox("Is Broken", value=False) |
|
|
|
filters = [] |
|
|
|
if url_from_not_contain: |
|
for url in url_from_not_contain.split(','): |
|
filters.append(["url_from", "not_like", f"%{url.strip()}%"]) |
|
filters.append("and") |
|
|
|
if is_lost: |
|
filters.append(["is_lost", "=", is_lost]) |
|
filters.append("and") |
|
|
|
if dofollow: |
|
filters.append(["dofollow", "=", dofollow]) |
|
filters.append("and") |
|
|
|
if is_broken: |
|
filters.append(["is_broken", "=", is_broken]) |
|
filters.append("and") |
|
|
|
if page_from_rank_value != 0: |
|
filters.append(["page_from_rank", ">=", page_from_rank_value]) |
|
filters.append("and") |
|
|
|
if domain_from_rank_value != 0: |
|
filters.append(["domain_from_rank", ">=", domain_from_rank_value]) |
|
filters.append("and") |
|
|
|
filters.append(["backlink_spam_score", "<=", backlink_spam_score]) |
|
filters.append("and") |
|
|
|
filters.append(["page_from_language", "=", page_from_language]) |
|
filters.append("and") |
|
|
|
filters.append(["anchor", "not_like", ""]) |
|
filters.append("and") |
|
|
|
if filters and filters[-1] == "and": |
|
filters.pop() |
|
|
|
num_concurrent_calls = st.sidebar.number_input("Concurrent Calls:", min_value=1, max_value=10, value=5, step=1) |
|
data_section = st.sidebar.text_area("Paste List of URLs:") |
|
paste_data = st.sidebar.button("Paste Data") |
|
add_row = st.sidebar.button("Add Row") |
|
reset = st.sidebar.button("Reset") |
|
|
|
if paste_data: |
|
pasted_urls = process_pasted_data(data_section) |
|
for i, url in enumerate(pasted_urls): |
|
st.session_state[f"target_url_{i}"] = url |
|
st.session_state["row_count"] = len(pasted_urls) |
|
|
|
if add_row: |
|
row_count = st.session_state.get("row_count", 0) + 1 |
|
st.session_state["row_count"] = row_count |
|
|
|
if reset: |
|
st.session_state.clear() |
|
|
|
row_count = st.session_state.get("row_count", 1) |
|
|
|
generate_button = st.sidebar.button("Generate All") |
|
|
|
if generate_button: |
|
jobs = Queue() |
|
results = Queue() |
|
workers = [BacklinkWorker(jobs, results, api_login, api_key) for _ in range(num_concurrent_calls)] |
|
|
|
for worker in workers: |
|
worker.start() |
|
|
|
for i in range(row_count): |
|
target_url = st.session_state.get(f"target_url_{i}", "") |
|
if target_url: |
|
jobs.put((i, target_url, filters)) |
|
|
|
for _ in workers: |
|
jobs.put(None) |
|
|
|
for worker in workers: |
|
worker.join() |
|
|
|
while not results.empty(): |
|
url_id, df = results.get() |
|
st.session_state[f"df_{url_id}"] = df |
|
|
|
for i in range(row_count): |
|
cols = st.columns(2) |
|
target_url_key = f"target_url_{i}" |
|
target_url = cols[0].text_input(f"Enter the target URL {i + 1}", key=target_url_key) |
|
df_key = f"df_{i}" |
|
df = st.session_state.get(df_key) |
|
if df is not None: |
|
csv = convert_df_to_csv(df) |
|
cols[1].download_button( |
|
label=f"Download data as CSV for URL {i + 1}", |
|
data=csv, |
|
file_name=f'backlinks_{i + 1}.csv', |
|
mime='text/csv', |
|
) |
|
|
|
|
|
def concatenate_dfs_and_download(): |
|
dfs = [st.session_state[key] for key in st.session_state.keys() if key.startswith('df_') and st.session_state[key] is not None] |
|
if dfs: |
|
combined_df = pd.concat(dfs, ignore_index=True) |
|
csv = combined_df.to_csv(index=False).encode('utf-8') |
|
st.sidebar.download_button( |
|
label="Download All as CSV", |
|
data=csv, |
|
file_name='all_backlinks.csv', |
|
mime='text/csv', |
|
) |
|
else: |
|
st.sidebar.write("No data available to download.") |
|
|
|
concatenate_dfs_and_download() |