File size: 2,669 Bytes
496ed86
f38f2ab
496ed86
 
f38f2ab
 
 
496ed86
 
 
f38f2ab
 
 
 
 
 
 
 
 
 
496ed86
 
 
f38f2ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f8d07a
f38f2ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496ed86
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import FileResponse
import os
import io
from fastapi.middleware.cors import CORSMiddleware
import requests
import pandas as pd


app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# Parameters
API_KEY = "an3vib2nh4-3R48tMWfBZg"
WEBSITE_COLUMN = "Website"



def get_company_data(api_key, domain):
    response = requests.get(f"https://api.apollo.io/v1/organizations/enrich?api_key={api_key}&domain={domain}")
    result = response.json()
    
    if "organization" in result:
        org = result["organization"]
        return {
            "domain": domain,
            "alexa_ranking": org.get("alexa_ranking", "unknown"),
            "annual_revenue": org.get("annual_revenue", "unknown"),
            "country": org.get("country", "unknown"),
            "estimated_num_employees": org.get("estimated_num_employees", "unknown"),
            "industry": org.get("industry", "unknown"),
            "keywords": org.get("keywords", "unknown"),
            "linkedin_uid": org.get("linkedin_uid", "unknown")
        }
    else:
        print(f"No data for {domain}")
        return {
            "domain": domain,
            "alexa_ranking": "unknown",
            "annual_revenue": "unknown",
            "country": "unknown",
            "estimated_num_employees": "unknown",
            "industry": "unknown",
            "keywords": "unknown",
            "linkedin_uid": "unknown"
        }

@app.post("/get_data_file")
def main(file: UploadFile = File(...)):
    LEAD_LIST_PATH = file.filename
    print(file.filename)
    with open(file.filename, "wb") as file_object:
        file_object.write(file.file.read())
        
    def get_domain(url):
        if "//" in url:
            start = url.index("//") + 2
        else:
            start = 0
        result = url[start:].strip("/")
        return result
    
    # Read the list of websites from the Excel file
    data = pd.read_excel(LEAD_LIST_PATH)
    websites = data[WEBSITE_COLUMN].drop_duplicates().apply(get_domain)
    
    # Fetch company data for each website
    company_data = []
    for website in websites:
        company_data.append(get_company_data(API_KEY, website))
        
    OUTPUT_PATH = "CompanyData.xlsx"
    # Create a DataFrame and save to Excel
    df = pd.DataFrame(company_data)
    df.to_excel(OUTPUT_PATH, index=False)
    
    print("Company data has been successfully fetched and saved.")
    return FileResponse(OUTPUT_PATH, media_type='application/octet-stream', filename=OUTPUT_PATH)