joshuadunlop's picture
Update app.py
8ab7ebf verified
import streamlit as st
import pandas as pd
import requests
import base64
import threading
from queue import Queue
from io import StringIO
import csv
def get_backlinks(api_login, api_key, target_url, filters):
try:
encoded_credentials = base64.b64encode(f"{api_login}:{api_key}".encode()).decode()
headers = {'Authorization': f'Basic {encoded_credentials}'}
post_data = {0: {"target": target_url, "limit": 1000, "mode": "as_is", "filters": filters}}
response = requests.post("https://api.dataforseo.com/v3/backlinks/backlinks/live", json=post_data, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
response_data = response.json()
if 'tasks' in response_data:
task_result = response_data['tasks'][0]['result']
if task_result and 'items' in task_result[0]:
items = task_result[0]['items']
df = pd.json_normalize(items)
return df
else:
st.error("Received empty 'result' from API or missing 'items'.")
return None
else:
st.error(f"No 'tasks' key in response JSON. Full response: {response_data}")
return None
except requests.RequestException as e:
st.error(f"Request error: {e}")
return None
def convert_df_to_csv(df):
return df.to_csv(index=False).encode('utf-8')
class BacklinkWorker(threading.Thread):
def __init__(self, jobs, results, api_login, api_key):
super().__init__()
self.jobs = jobs
self.results = results
self.api_login = api_login
self.api_key = api_key
def run(self):
while True:
job = self.jobs.get()
if job is None:
break
url_id, target_url, filters = job
df = get_backlinks(self.api_login, self.api_key, target_url, filters)
self.results.put((url_id, df))
def process_pasted_data(data):
data_io = StringIO(data.strip())
reader = csv.reader(data_io, delimiter='\n', quotechar='"')
return [row[0] for row in reader]
st.sidebar.title("DataForSEO API Parameters")
api_login = st.sidebar.text_input("API Login", value="[email protected]")
api_key = st.sidebar.text_input("API Key", type="password")
st.sidebar.title("Filters")
st.sidebar.info("A maximum of 8 filters are allowed through the API. That includes one for every comma-separated value below, one for every value not equal to 0 in the boxes, one for language, and one for each button ticked. If you try to add more, it will fail.")
url_from_not_contain = st.sidebar.text_input("URL from does not contain (comma-separated)", value="keyword,blogspot,/search,/tag")
backlink_spam_score = st.sidebar.number_input("Backlink Spam Score ≤", value=20)
page_from_rank_value = st.sidebar.number_input("Page From Rank Value ≥", value=0)
domain_from_rank_value = st.sidebar.number_input("Domain From Rank Value ≥", value=100)
page_from_language = st.sidebar.selectbox("Page From Language", ['en', 'other'])
is_lost = st.sidebar.checkbox("Is Lost", value=False)
dofollow = st.sidebar.checkbox("Dofollow", value=False)
is_broken = st.sidebar.checkbox("Is Broken", value=False)
filters = []
if url_from_not_contain:
for url in url_from_not_contain.split(','):
filters.append(["url_from", "not_like", f"%{url.strip()}%"])
filters.append("and")
if is_lost:
filters.append(["is_lost", "=", is_lost])
filters.append("and")
if dofollow:
filters.append(["dofollow", "=", dofollow])
filters.append("and")
if is_broken:
filters.append(["is_broken", "=", is_broken])
filters.append("and")
if page_from_rank_value != 0:
filters.append(["page_from_rank", ">=", page_from_rank_value])
filters.append("and")
if domain_from_rank_value != 0:
filters.append(["domain_from_rank", ">=", domain_from_rank_value])
filters.append("and")
filters.append(["backlink_spam_score", "<=", backlink_spam_score])
filters.append("and")
filters.append(["page_from_language", "=", page_from_language])
filters.append("and")
filters.append(["anchor", "not_like", ""])
filters.append("and")
if filters and filters[-1] == "and":
filters.pop()
num_concurrent_calls = st.sidebar.number_input("Concurrent Calls:", min_value=1, max_value=10, value=5, step=1)
data_section = st.sidebar.text_area("Paste List of URLs:")
paste_data = st.sidebar.button("Paste Data")
add_row = st.sidebar.button("Add Row")
reset = st.sidebar.button("Reset")
if paste_data:
pasted_urls = process_pasted_data(data_section)
for i, url in enumerate(pasted_urls):
st.session_state[f"target_url_{i}"] = url
st.session_state["row_count"] = len(pasted_urls)
if add_row:
row_count = st.session_state.get("row_count", 0) + 1
st.session_state["row_count"] = row_count
if reset:
st.session_state.clear()
row_count = st.session_state.get("row_count", 1)
generate_button = st.sidebar.button("Generate All")
if generate_button:
jobs = Queue()
results = Queue()
workers = [BacklinkWorker(jobs, results, api_login, api_key) for _ in range(num_concurrent_calls)]
for worker in workers:
worker.start()
for i in range(row_count):
target_url = st.session_state.get(f"target_url_{i}", "")
if target_url:
jobs.put((i, target_url, filters))
for _ in workers:
jobs.put(None)
for worker in workers:
worker.join()
while not results.empty():
url_id, df = results.get()
st.session_state[f"df_{url_id}"] = df
for i in range(row_count):
cols = st.columns(2)
target_url_key = f"target_url_{i}"
target_url = cols[0].text_input(f"Enter the target URL {i + 1}", key=target_url_key)
df_key = f"df_{i}"
df = st.session_state.get(df_key)
if df is not None:
csv = convert_df_to_csv(df)
cols[1].download_button(
label=f"Download data as CSV for URL {i + 1}",
data=csv,
file_name=f'backlinks_{i + 1}.csv',
mime='text/csv',
)
# Function to concatenate all DataFrames and convert to CSV for download
def concatenate_dfs_and_download():
dfs = [st.session_state[key] for key in st.session_state.keys() if key.startswith('df_') and st.session_state[key] is not None]
if dfs:
combined_df = pd.concat(dfs, ignore_index=True)
csv = combined_df.to_csv(index=False).encode('utf-8')
st.sidebar.download_button(
label="Download All as CSV",
data=csv,
file_name='all_backlinks.csv',
mime='text/csv',
)
else:
st.sidebar.write("No data available to download.")
concatenate_dfs_and_download()