File size: 2,669 Bytes
496ed86 f38f2ab 496ed86 f38f2ab 496ed86 f38f2ab 496ed86 f38f2ab 4f8d07a f38f2ab 496ed86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import FileResponse
import os
import io
from fastapi.middleware.cors import CORSMiddleware
import requests
import pandas as pd
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Parameters
API_KEY = "an3vib2nh4-3R48tMWfBZg"
WEBSITE_COLUMN = "Website"
def get_company_data(api_key, domain):
response = requests.get(f"https://api.apollo.io/v1/organizations/enrich?api_key={api_key}&domain={domain}")
result = response.json()
if "organization" in result:
org = result["organization"]
return {
"domain": domain,
"alexa_ranking": org.get("alexa_ranking", "unknown"),
"annual_revenue": org.get("annual_revenue", "unknown"),
"country": org.get("country", "unknown"),
"estimated_num_employees": org.get("estimated_num_employees", "unknown"),
"industry": org.get("industry", "unknown"),
"keywords": org.get("keywords", "unknown"),
"linkedin_uid": org.get("linkedin_uid", "unknown")
}
else:
print(f"No data for {domain}")
return {
"domain": domain,
"alexa_ranking": "unknown",
"annual_revenue": "unknown",
"country": "unknown",
"estimated_num_employees": "unknown",
"industry": "unknown",
"keywords": "unknown",
"linkedin_uid": "unknown"
}
@app.post("/get_data_file")
def main(file: UploadFile = File(...)):
LEAD_LIST_PATH = file.filename
print(file.filename)
with open(file.filename, "wb") as file_object:
file_object.write(file.file.read())
def get_domain(url):
if "//" in url:
start = url.index("//") + 2
else:
start = 0
result = url[start:].strip("/")
return result
# Read the list of websites from the Excel file
data = pd.read_excel(LEAD_LIST_PATH)
websites = data[WEBSITE_COLUMN].drop_duplicates().apply(get_domain)
# Fetch company data for each website
company_data = []
for website in websites:
company_data.append(get_company_data(API_KEY, website))
OUTPUT_PATH = "CompanyData.xlsx"
# Create a DataFrame and save to Excel
df = pd.DataFrame(company_data)
df.to_excel(OUTPUT_PATH, index=False)
print("Company data has been successfully fetched and saved.")
return FileResponse(OUTPUT_PATH, media_type='application/octet-stream', filename=OUTPUT_PATH)
|