import streamlit as st import pandas as pd import requests import base64 import threading from queue import Queue from io import StringIO import csv def get_backlinks(api_login, api_key, target_url, filters): try: encoded_credentials = base64.b64encode(f"{api_login}:{api_key}".encode()).decode() headers = {'Authorization': f'Basic {encoded_credentials}'} post_data = {0: {"target": target_url, "limit": 1000, "mode": "as_is", "filters": filters}} response = requests.post("https://api.dataforseo.com/v3/backlinks/backlinks/live", json=post_data, headers=headers) response.raise_for_status() # Raise an exception for HTTP errors response_data = response.json() if 'tasks' in response_data: task_result = response_data['tasks'][0]['result'] if task_result and 'items' in task_result[0]: items = task_result[0]['items'] df = pd.json_normalize(items) return df else: st.error("Received empty 'result' from API or missing 'items'.") return None else: st.error(f"No 'tasks' key in response JSON. Full response: {response_data}") return None except requests.RequestException as e: st.error(f"Request error: {e}") return None def convert_df_to_csv(df): return df.to_csv(index=False).encode('utf-8') class BacklinkWorker(threading.Thread): def __init__(self, jobs, results, api_login, api_key): super().__init__() self.jobs = jobs self.results = results self.api_login = api_login self.api_key = api_key def run(self): while True: job = self.jobs.get() if job is None: break url_id, target_url, filters = job df = get_backlinks(self.api_login, self.api_key, target_url, filters) self.results.put((url_id, df)) def process_pasted_data(data): data_io = StringIO(data.strip()) reader = csv.reader(data_io, delimiter='\n', quotechar='"') return [row[0] for row in reader] st.sidebar.title("DataForSEO API Parameters") api_login = st.sidebar.text_input("API Login", value="josh@expertphotography.com") api_key = st.sidebar.text_input("API Key", type="password") st.sidebar.title("Filters") st.sidebar.info("A maximum of 8 filters are allowed through the API. That includes one for every comma-separated value below, one for every value not equal to 0 in the boxes, one for language, and one for each button ticked. If you try to add more, it will fail.") url_from_not_contain = st.sidebar.text_input("URL from does not contain (comma-separated)", value="keyword,blogspot,/search,/tag") backlink_spam_score = st.sidebar.number_input("Backlink Spam Score ≤", value=20) page_from_rank_value = st.sidebar.number_input("Page From Rank Value ≥", value=0) domain_from_rank_value = st.sidebar.number_input("Domain From Rank Value ≥", value=100) page_from_language = st.sidebar.selectbox("Page From Language", ['en', 'other']) is_lost = st.sidebar.checkbox("Is Lost", value=False) dofollow = st.sidebar.checkbox("Dofollow", value=False) is_broken = st.sidebar.checkbox("Is Broken", value=False) filters = [] if url_from_not_contain: for url in url_from_not_contain.split(','): filters.append(["url_from", "not_like", f"%{url.strip()}%"]) filters.append("and") if is_lost: filters.append(["is_lost", "=", is_lost]) filters.append("and") if dofollow: filters.append(["dofollow", "=", dofollow]) filters.append("and") if is_broken: filters.append(["is_broken", "=", is_broken]) filters.append("and") if page_from_rank_value != 0: filters.append(["page_from_rank", ">=", page_from_rank_value]) filters.append("and") if domain_from_rank_value != 0: filters.append(["domain_from_rank", ">=", domain_from_rank_value]) filters.append("and") filters.append(["backlink_spam_score", "<=", backlink_spam_score]) filters.append("and") filters.append(["page_from_language", "=", page_from_language]) filters.append("and") filters.append(["anchor", "not_like", ""]) filters.append("and") if filters and filters[-1] == "and": filters.pop() num_concurrent_calls = st.sidebar.number_input("Concurrent Calls:", min_value=1, max_value=10, value=5, step=1) data_section = st.sidebar.text_area("Paste List of URLs:") paste_data = st.sidebar.button("Paste Data") add_row = st.sidebar.button("Add Row") reset = st.sidebar.button("Reset") if paste_data: pasted_urls = process_pasted_data(data_section) for i, url in enumerate(pasted_urls): st.session_state[f"target_url_{i}"] = url st.session_state["row_count"] = len(pasted_urls) if add_row: row_count = st.session_state.get("row_count", 0) + 1 st.session_state["row_count"] = row_count if reset: st.session_state.clear() row_count = st.session_state.get("row_count", 1) generate_button = st.sidebar.button("Generate All") if generate_button: jobs = Queue() results = Queue() workers = [BacklinkWorker(jobs, results, api_login, api_key) for _ in range(num_concurrent_calls)] for worker in workers: worker.start() for i in range(row_count): target_url = st.session_state.get(f"target_url_{i}", "") if target_url: jobs.put((i, target_url, filters)) for _ in workers: jobs.put(None) for worker in workers: worker.join() while not results.empty(): url_id, df = results.get() st.session_state[f"df_{url_id}"] = df for i in range(row_count): cols = st.columns(2) target_url_key = f"target_url_{i}" target_url = cols[0].text_input(f"Enter the target URL {i + 1}", key=target_url_key) df_key = f"df_{i}" df = st.session_state.get(df_key) if df is not None: csv = convert_df_to_csv(df) cols[1].download_button( label=f"Download data as CSV for URL {i + 1}", data=csv, file_name=f'backlinks_{i + 1}.csv', mime='text/csv', ) # Function to concatenate all DataFrames and convert to CSV for download def concatenate_dfs_and_download(): dfs = [st.session_state[key] for key in st.session_state.keys() if key.startswith('df_') and st.session_state[key] is not None] if dfs: combined_df = pd.concat(dfs, ignore_index=True) csv = combined_df.to_csv(index=False).encode('utf-8') st.sidebar.download_button( label="Download All as CSV", data=csv, file_name='all_backlinks.csv', mime='text/csv', ) else: st.sidebar.write("No data available to download.") concatenate_dfs_and_download()