#!/bin/bash # Specify the number of articles to download limit=10 # Fetch the list of articles with metadata in XML format response=$(curl -s "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf&limit=$limit") # Parse each record in the response echo "$response" | while read -r line; do # Extract the PMC ID if [[ $line =~ id=\"(PMC[0-9]+)\" ]]; then pmc_id="${BASH_REMATCH[1]}" echo "Processing article ID: $pmc_id" # Extract the title for metadata title=$(echo "$response" | sed -n "//p" | sed -n 's/.*citation="\(.*\)".*/\1/p') # Extract the PDF link for download pdf_link=$(echo "$response" | sed -n "//p" | sed -n 's/.*]* href="\([^"]*\)".*/\1/p') # Check if we found a PDF link if [[ -n $pdf_link ]]; then # Print metadata echo "Title: $title" echo "Downloading PDF from: $pdf_link" # Download the PDF curl -O "$pdf_link" # Optional: Save metadata to a file echo "Title: $title" >> metadata.txt echo "PDF Link: $pdf_link" >> metadata.txt echo "---------------------" >> metadata.txt else echo "No PDF link found for article ID: $pmc_id" fi fi done