name: Scrape Data on: schedule: - cron: '0 0 * * *' workflow_dispatch: jobs: scrape: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt - name: Run scraping script run: python ./scrape_data_indeed/scrape_data.py - name: Commit and push changes run: | git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' git add . git commit -m "Update scraped data" git push env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Pull latest changes run: git pull origin main - name: Create .env file to store API key run: | echo "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" >> ./.env echo "NEO4J_URI=${{ secrets.NEO4J_URI }}" >> ./.env echo "NEO4J_USERNAME=${{ secrets.NEO4J_USERNAME }}" >> ./.env echo "NEO4J_PASSWORD=${{ secrets.NEO4J_PASSWORD }}" >> ./.env - name: Run Python scripts to update KG run: python ./Knowledge_Graph/update_knowledge_graph.py - name: Remove file .env run: rm ./.env - name: Commit files run: | git config --local user.email "action@github.com" git config --local user.name "GitHub Action" git add -A git diff-index --quiet HEAD || (git commit -a -m "updated logs" --allow-empty) - name: Push changes uses: ad-m/github-push-action@v0.6.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: main