Spaces:

joshuadunlop
/

dataforseo_Backlink-Exporter

Sleeping

File size: 4,812 Bytes

b2974e8
7119782
 
7e21d9b
7119782
fe997de
7e21d9b
7a83db7
7e21d9b
 
 
 
 
 
d743cd0
7e21d9b
 
 
 
 
fe997de
 
7e21d9b
7119782
7e21d9b
 
 
 
47e2fc4
 
53590af
 
 
f5ed2ae
 
 
 
 
 
b95b7b7
 
 
 
 
 
9deb76c
 
 
 
 
 
 
b95b7b7
 
9deb76c
b95b7b7
 
9deb76c
b95b7b7
e5bd9f1
9deb76c
47e2fc4
7119782
b95b7b7
 
47e2fc4
 
53590af
 
f5ed2ae
53590af
7119782
 
7a83db7
7119782
7a83db7
ba3ff9a
7119782
 
 
 
 
fe997de
188966c
7119782
 
ba3ff9a
7119782
 
b7d9073
 
ba3ff9a
7119782
b7d9073
 
ba3ff9a
7119782
b7d9073
 
ba3ff9a
b7d9073
 
 
ba3ff9a
b7d9073
 
 
7119782
 
 
 
 
 
f5ed2ae
bc5df64
37cf3dc
 
7119782
adc823d
 
71194e2
e5bd9f1
fe997de
47e2fc4
53590af
 
 
 
 
 
 
f5ed2ae
188966c
3dbc3bb
7119782
 
188966c

import streamlit as st
import pandas as pd
import requests
import base64

def get_backlinks(api_login, api_key, target_url, filters, include_subdomains):
    # Encoding credentials
    encoded_credentials = base64.b64encode(f"{api_login}:{api_key}".encode()).decode()

    # Setting headers with Basic Authentication
    headers = {
        'Authorization': f'Basic {encoded_credentials}'
    }

    # Prepare post data
    post_data = {
        0: {
            "target": target_url,
            "limit": 1000,
            "mode": "as_is",
            "filters": filters,
            "include_subdomains": include_subdomains  # Adding the include_subdomains filter
        }
    }

    # Making the API request
    response = requests.post("https://api.dataforseo.com/v3/backlinks/backlinks/live", json=post_data, headers=headers)

    # Log the full response for debugging
    st.text("API Response:")
    st.text(f"Response Status Code: {response.status_code}")
    st.text(f"Response Headers: {response.headers}")
    try:
        response_json = response.json()
        st.text(f"Response Body: {response_json}")
    except ValueError as e:
        st.text(f"Response Body: <Not a JSON response>\nError: {e}")

    # Check if the response contains 'results' key and handle the JSON structure appropriately
    if response.status_code == 200:
        response_data = response.json()
        
        # Debugging: Print out the keys of the response_data
        st.text(f"Keys in response JSON: {list(response_data.keys())}")

        if 'tasks' in response_data:
            # Assuming there is only one task and one result within each task
            task_result = response_data['tasks'][0]['result']
            if task_result and 'items' in task_result[0]:
                # The actual backlink items are nested within 'items'
                items = task_result[0]['items']
                st.text(f"First few items: {items[:5]}")  # Debugging line to show the items structure
                
                # Convert to DataFrame
                df = pd.json_normalize(items)
                return df
            else:
                st.error("Received empty 'result' from API or missing 'items'.")
                return None
        else:
            st.error(f"No 'tasks' key in response JSON. Full response: {response_data}")
            return None
    else:
        error_message = response.json().get('status_message', 'No specific error message provided')
        st.error(f"Error: Code: {response.status_code} Message: {error_message}")
        return None

def convert_df_to_csv(df):
    # Convert DataFrame to CSV
    return df.to_csv(index=False).encode('utf-8')

# Streamlit layout
st.sidebar.title("DataForSEO API Parameters")
api_login = st.sidebar.text_input("API Login", value="[email protected]")
api_key = st.sidebar.text_input("API Key", type="password")

# Filters input
url_from_not_contain = st.sidebar.text_input("URL from does not contain (comma-separated)")
is_lost = st.sidebar.checkbox("Is Lost", value=False)
dofollow = st.sidebar.checkbox("Dofollow", value=True)
backlink_spam_score = st.sidebar.slider("Backlink Spam Score ≤", 0, 100, 10)
page_from_language = st.sidebar.selectbox("Page From Language", ['en', 'other'])
include_subdomains = st.sidebar.checkbox("Include Subdomains", value=True)  # New filter

# Prepare filters for API call
filters = []

if url_from_not_contain:
    for url in url_from_not_contain.split(','):
        filters.append(["url_from", "not_like", url.strip()])
        filters.append("and")

if is_lost:
    filters.append(["is_lost", "=", is_lost])
    filters.append("and")

if dofollow:
    filters.append(["dofollow", "=", dofollow])
    filters.append("and")

filters.append(["backlink_spam_score", "<=", backlink_spam_score])
filters.append("and")
filters.append(["page_from_language", "=", page_from_language])

# Remove the last "and" if it's the last element
if filters and filters[-1] == "and":
    filters.pop()

# Main app layout
col1, col2 = st.columns(2)

with col1:
    st.header("Input")
    target_url = st.text_input("Enter the target URL")

generate_button = st.sidebar.button("Generate All")
reset_button = st.sidebar.button("Reset")

df = None

# Generate CSV and download button
if generate_button and target_url:
    df = get_backlinks(api_login, api_key, target_url, filters, include_subdomains)
    if df is not None:
        csv = convert_df_to_csv(df)
        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name='backlinks.csv',
            mime='text/csv',
        )
    else:
        st.error("Failed to generate CSV: No data returned from the API or data processing error.")

# Reset functionality
if reset_button:
    st.experimental_rerun()