es_gpt / crawl_index.py
hunkim's picture
Create crawl_index.py
4ffd5f1
raw
history blame contribute delete
575 Bytes
from scholarly import scholarly
import json
from es_gpt import ESGPT
# Create an instance of the ESGPT class
esgpt = ESGPT(index_name="papers")
# Search for papers by author ID
author = scholarly.search_author_id("JE_m2UgAAAAJ")
papers = scholarly.fill(author, sections=['publications'])
# Index each paper in Elasticsearch
for paper in papers['publications']:
paper = scholarly.fill(paper, sections=[])
print(paper)
paper_dict = paper['bib']
id = paper['author_pub_id']
# Index the paper in Elasticsearch
esgpt.index(doc_id=id, doc=paper_dict)