File size: 4,447 Bytes
ac13c4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from scholarly import scholarly
import csv
from scholarly import ProxyGenerator, scholarly
import os
import requests

api_key = os.getenv('ELSEVIER_API_KEY')
# Initialize a global variable to track if the proxy setup has been done
proxy_setup_done = False

def setup_proxy():
    global proxy_setup_done
    # Check if the proxy setup has already been done
    if not proxy_setup_done:
        # Set up a ProxyGenerator object to use free proxies
        pg = ProxyGenerator()
        pg.FreeProxies()
        scholarly.use_proxy(pg)
        
        # Mark the setup as done
        proxy_setup_done = True
        print("Proxy setup completed.")
    else:
        print("Proxy setup was already completed earlier in this session.")

# Example usage
setup_proxy()




def fetch_papers(search_string, min_results=8):
    search_query = scholarly.search_pubs(search_string)
    papers_details = []
    for _ in range(min_results):
        try:
            paper = next(search_query)
            paper_details = {
                'title': paper['bib']['title'],
                'author': paper['bib'].get('author'),
                'pub_year': paper['bib'].get('pub_year'),
                'publication_url': paper.get('pub_url', 'Not Available'),
                'journal_name': paper['bib'].get('journal', 'Not Available'),
                # Attempting to extract DOI, publication date, and making an educated guess on paper type
                'doi': paper.get('doi', 'Not Available'),
                'publication_date': paper['bib'].get('pub_year', 'Not Available'), # Simplified to publication year
                'paper_type': 'Journal' if 'journal' in paper['bib'] else 'Conference' if 'conference' in paper['bib'] else 'Primary Study' # Simplistic categorization
            }
            papers_details.append(paper_details)
        except StopIteration:
            break  # Exit if there are no more results
    return papers_details


def save_papers_to_csv(papers_details, filename='papers.csv'):
    fieldnames = ['title', 'author', 'pub_year', 'publication_url', 'journal_name', 'doi', 'publication_date', 'paper_type']
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for paper in papers_details:
            writer.writerow(paper)




def search_elsevier(search_string, start_year, end_year, limit):
    
    url = "https://api.elsevier.com/content/search/scopus"
    headers = {
        "X-ELS-APIKey": api_key,
        "Accept": "application/json"
    }
    
    query = f"TITLE-ABS-KEY({search_string}) AND PUBYEAR = {start_year}"
    params = {
        "query": query,
        "count": limit,
    }
    

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        response_data = response.json()
        papers = response_data.get('search-results', {}).get('entry', [])
        parsed_papers = []
        for paper in papers:
            parsed_paper = {
                "affiliation-country": next((affil.get("affiliation-country", "Not Available") for affil in paper.get("affiliation", [])), "Not Available"),
                "affilname": next((affil.get("affilname", "Not Available") for affil in paper.get("affiliation", [])), "Not Available"),
                "creator": paper.get("dc:creator", "Not Available"),
                "identifier": paper.get("dc:identifier", "Not Available"),
                "title": paper.get("dc:title", "Not Available"),
                "link": next((link["@href"] for link in paper.get("link", []) if link["@ref"] == "scopus"), "Not Available"),
                "year": paper.get("prism:coverDate", "Not Available").split("-")[0],
                "openaccess": paper.get("openaccess", "0") == "1",
                "publicationName": paper.get("prism:publicationName", "Not Available"),
                "aggregationType": paper.get("prism:aggregationType", "Not Available"),
                "volume": paper.get("prism:volume", "Not Available"),
                "doi": paper.get("prism:doi", "Not Available")
            }
            parsed_papers.append(parsed_paper)
        return parsed_papers
    else:
        print(f"Failed to fetch papers: {response.status_code} {response.text}")
        return {"error": "Failed to fetch papers from Elsevier", "status_code": response.status_code, "message": response.text}