diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..bfd0e8b074a7be7ab48a5a1322596da1a159b396 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +venv/ +__pycache__/ +*.pyc +*.pyo +*.pyd +*.tar.gz +*.log +*.pid diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..26d33521af10bcc7fd8cea344038eaaeb78d0ef5 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/MT564AITraining.iml b/.idea/MT564AITraining.iml new file mode 100644 index 0000000000000000000000000000000000000000..8388dbc88e9e141893654c3ba2c7dc0d36bddcb0 --- /dev/null +++ b/.idea/MT564AITraining.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000000000000000000000000000000000000..812ab5a681a7844ea9041d160d3b652a326096c7 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..d264ee5e1cd90dbcff8c3c9a4a90b9ac81cfc890 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000000000000000000000000000000000000..35eb1ddfbbc029bcab630581847471d7f238ec53 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md new file mode 100644 index 0000000000000000000000000000000000000000..6a4d7e4df723b0bd933cad4c74a6e9702bc70d75 --- /dev/null +++ b/DOCUMENTATION.md @@ -0,0 +1,279 @@ +# SWIFT MT564 Documentation Assistant + +**Version:** 1.0.0 +**Date:** May 14, 2025 +**Author:** Replit AI + +## Table of Contents + +1. [Introduction](#introduction) +2. [Project Overview](#project-overview) +3. [System Architecture](#system-architecture) +4. [Installation & Setup](#installation--setup) +5. [Component Details](#component-details) + - [Data Collection](#data-collection) + - [Model Training](#model-training) + - [Web Interface](#web-interface) + - [Hugging Face Integration](#hugging-face-integration) +6. [Usage Guide](#usage-guide) +7. [Troubleshooting](#troubleshooting) +8. [References](#references) + +## Introduction + +The SWIFT MT564 Documentation Assistant is a specialized AI system designed to help financial professionals understand and work with SWIFT MT564 message formats (Corporate Action Notifications). It combines web scraping, natural language processing, and a conversational interface to provide an intelligent assistant for interpreting MT564 documentation. + +## Project Overview + +This project creates a complete pipeline that: + +1. Scrapes SWIFT MT564 documentation from official sources +2. Processes this information into a structured format +3. Fine-tunes a TinyLlama language model on this specialized data +4. Provides a user interface for asking questions about MT564 +5. Enables deployment to Hugging Face for easy sharing and use + +The system is designed to be modular, allowing for future expansion to other SWIFT message types or financial documentation. + +## System Architecture + +The system consists of several key components: + +``` +SWIFT-MT564-Assistant/ +├── scrapers/ # Web scraping components +│ ├── iso20022_scraper.py # Scraper for ISO20022 website +│ ├── pdf_parser.py # PDF extraction utilities +│ └── data_processor.py # Converts raw data to training format +│ +├── model/ # ML model components +│ ├── download_tinyllama.py # Script to download TinyLlama model +│ ├── upload_to_huggingface.py # Script to upload model to Hugging Face +│ ├── tinyllama_trainer.py # Fine-tuning implementation +│ └── evaluator.py # Tests model performance +│ +├── webapp/ # Web application +│ ├── app.py # Flask application +│ ├── templates/ # HTML templates +│ │ ├── index.html # Main page +│ │ └── result.html # Results display +│ └── static/ # CSS, JS, and other static files +│ +├── data/ # Data storage +│ ├── raw/ # Raw scraped data +│ ├── processed/ # Processed training data +│ └── uploaded/ # User-uploaded PDFs +│ +├── train_mt564_model.py # Script to train the model +├── prepare_mt564_data.py # Script to prepare training data +├── dependencies.txt # Project dependencies +├── setup.py # Setup and utility script +└── README.md # Project documentation +``` + +## Installation & Setup + +### System Requirements + +- Python 3.8 or higher +- At least 4GB RAM (8GB+ recommended) +- At least 10GB free disk space +- CUDA-compatible GPU recommended for training (but not required) +- Internet connection for downloading models and data + +### Local Installation + +1. **Clone or download the project**: + - Download the zip file from Replit + - Extract to a folder on your local machine + +2. **Set up a virtual environment**: + ```bash + # Create a virtual environment + python -m venv venv + + # Activate the environment + # On Windows: + venv\Scripts\activate + # On macOS/Linux: + source venv/bin/activate + ``` + +3. **Install dependencies**: + ```bash + # Install core dependencies + pip install torch transformers datasets huggingface_hub accelerate + pip install requests beautifulsoup4 trafilatura flask + pip install PyPDF2 tqdm nltk rouge + + # Or use the dependencies.txt file + pip install -r dependencies.txt + ``` + +4. **Run the setup script for guidance**: + ```bash + python setup.py --mode guide + ``` + +### Environment Variables + +The following environment variables are used: + +- `HUGGING_FACE_TOKEN`: Your Hugging Face API token (for uploading models) +- `FLASK_APP`: Set to "webapp/app.py" for running the web interface +- `FLASK_ENV`: Set to "development" for debugging or "production" for deployment + +## Component Details + +### Data Collection + +The data collection process involves scraping SWIFT MT564 documentation from official sources: + +1. **ISO20022 Website Scraping**: + ```bash + python scrapers/iso20022_scraper.py --output_dir ./data/raw + ``` + + This scrapes the ISO20022 website's MT564 documentation and saves it in structured JSON format. + +2. **Data Processing**: + ```bash + python prepare_mt564_data.py --input_file ./data/raw/mt564_documentation.json --output_file ./data/processed/mt564_training_data.json + ``` + + This converts the raw data into instruction-response pairs suitable for training. + +### Model Training + +The model training process involves: + +1. **Downloading the base model**: + ```bash + python model/download_tinyllama.py --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./data/models + ``` + +2. **Fine-tuning on MT564 data**: + ```bash + python train_mt564_model.py --model_name ./data/models/TinyLlama-1.1B-Chat-v1.0 --training_data ./data/processed/mt564_training_data.json --output_dir ./mt564_tinyllama_model + ``` + + Training parameters can be adjusted as needed: + - `--epochs`: Number of training epochs (default: 3) + - `--batch_size`: Batch size (default: 2) + - `--learning_rate`: Learning rate (default: 2e-5) + +3. **Evaluating the model**: + The training script includes validation, but further evaluation can be performed on test data if needed. + +### Web Interface + +The web interface provides a user-friendly way to interact with the model: + +1. **Starting the web server**: + ```bash + python webapp/app.py + ``` + +2. **Using the interface**: + - Open a browser and navigate to `http://localhost:5000` + - Upload SWIFT MT564 documentation PDFs + - Ask questions about the message format + - View AI-generated responses + +### Hugging Face Integration + +The project includes tools for seamless integration with Hugging Face: + +1. **Uploading your model**: + ```bash + # Set your Hugging Face API token + export HUGGING_FACE_TOKEN=your_token_here + + # Upload the model + python model/upload_to_huggingface.py --model_dir ./mt564_tinyllama_model --repo_name your-username/mt564-tinyllama + ``` + +2. **Creating a Hugging Face Space**: + - Go to huggingface.co and click "New Space" + - Choose Gradio or Streamlit template + - Link to your uploaded model + - Use the sample code provided in the setup guide + +## Usage Guide + +### Common Workflows + +#### Complete Pipeline + +1. Scrape data → 2. Process data → 3. Download model → 4. Train model → 5. Upload to Hugging Face + +```bash +# 1. Scrape data +python scrapers/iso20022_scraper.py --output_dir ./data/raw + +# 2. Process data +python prepare_mt564_data.py --input_file ./data/raw/mt564_documentation.json --output_file ./data/processed/mt564_training_data.json + +# 3. Download model +python model/download_tinyllama.py --output_dir ./data/models + +# 4. Train model +python train_mt564_model.py --training_data ./data/processed/mt564_training_data.json --output_dir ./mt564_tinyllama_model + +# 5. Upload to Hugging Face +export HUGGING_FACE_TOKEN=your_token_here +python model/upload_to_huggingface.py --model_dir ./mt564_tinyllama_model --repo_name your-username/mt564-tinyllama +``` + +#### Using Pre-trained Model + +If you already have a trained model, you can skip steps 1-4 and just run the web interface: + +```bash +# Start the web interface +python webapp/app.py +``` + +## Troubleshooting + +### Common Issues + +1. **Out of memory during training**: + - Reduce batch size: `--batch_size 1` + - Increase gradient accumulation: `--gradient_accumulation_steps 8` + - Use CPU only if necessary: `--device cpu` + +2. **Installation errors**: + - Make sure you're using Python 3.8+ + - Try installing dependencies one by one + - Check for package conflicts + +3. **Hugging Face upload issues**: + - Verify your HUGGING_FACE_TOKEN is set correctly + - Make sure you have write access to the repository + - Check for repository naming conflicts + +### Getting Help + +If you encounter issues: +1. Check the error messages for specific details +2. Consult the Hugging Face documentation for model/API issues +3. Review the TinyLlama documentation for model-specific questions + +## References + +- [SWIFT MT564 Documentation](https://www.iso20022.org/15022/uhb/finmt564.htm) +- [TinyLlama Project](https://github.com/jzhang38/TinyLlama) +- [Hugging Face Documentation](https://huggingface.co/docs) +- [Transformers Library](https://huggingface.co/docs/transformers/index) +- [Flask Web Framework](https://flask.palletsprojects.com/) + +--- + +## License + +This project is available under the Apache 2.0 License. + +## Acknowledgements + +This project utilizes several open-source libraries and resources, including TinyLlama, Hugging Face Transformers, and Flask. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/api/.DS_Store b/api/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e66682e193ce3892d06aec04e5520a83604948d7 Binary files /dev/null and b/api/.DS_Store differ diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1e6367987b7d6f8bfbce3777b2d81b161b6c03 --- /dev/null +++ b/api/__init__.py @@ -0,0 +1 @@ +# This file makes the api directory a Python package diff --git a/api/horoscope_routes.py b/api/horoscope_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..b92cc9e6d77d7c74cee64c863ba773eae36742d2 --- /dev/null +++ b/api/horoscope_routes.py @@ -0,0 +1,462 @@ +import logging +from flask import request, jsonify, Blueprint +from services.horoscope_service import horoscope_service +from services.llm_service import llm_service +from services.scheduler_service import scheduler_service +from services.wordpress_service import wordpress_service +from utils.rate_limiter import RateLimiter +from models import db, Horoscope, ConsolidatedHoroscope, ScheduledJob, WordPressExport +from datetime import datetime, date +import json + +logger = logging.getLogger(__name__) + +# Create Blueprint +horoscope_bp = Blueprint('horoscope', __name__, url_prefix='/api/horoscope') + +# API-wide rate limiter (10 requests per minute) +api_rate_limiter = RateLimiter(window_size=60, max_requests=10) + + +@horoscope_bp.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint for horoscope API""" + return jsonify({ + "status": "ok", + "services": { + "horoscope_scraper": "up", + "llm": "up" if llm_service.api_key else "down", + "scheduler": "up" if scheduler_service.running else "down", + "wordpress": "up" if wordpress_service.is_configured else "down" + } + }) + + +@horoscope_bp.route('/scrape', methods=['POST']) +def scrape_horoscope(): + """Scrape horoscope for a specific sign""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get parameters from request + data = request.get_json() + if not data: + return jsonify({"error": "Missing request data"}), 400 + + sign = data.get('sign') + source = data.get('source') + date_str = data.get('date') + + if not sign: + return jsonify({"error": "Missing 'sign' parameter"}), 400 + + if sign.lower() not in horoscope_service.scrapers["astrology.com"].ZODIAC_SIGNS: + return jsonify({"error": f"Invalid zodiac sign: {sign}"}), 400 + + # If source is specified, check if it's valid + if source and source not in horoscope_service.scrapers: + return jsonify({"error": f"Unknown source: {source}"}), 400 + + # Scrape from all sources or the specified one + if source: + result = horoscope_service.scrape_sign(source, sign, date_str) + else: + result = horoscope_service.scrape_sign_from_all_sources(sign, date_str) + + return jsonify(result) + + +@horoscope_bp.route('/scrape-all', methods=['POST']) +def scrape_all_horoscopes(): + """Scrape horoscopes for all signs from all sources""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get date from request + data = request.get_json() or {} + date_str = data.get('date') + + # Scrape all horoscopes + results = horoscope_service.scrape_all_horoscopes(date_str) + + return jsonify({"results": results}) + + +@horoscope_bp.route('/get/', methods=['GET']) +def get_horoscope(sign): + """Get horoscope for a specific sign""" + # Check if sign is valid + if sign.lower() not in horoscope_service.scrapers["astrology.com"].ZODIAC_SIGNS: + return jsonify({"error": f"Invalid zodiac sign: {sign}"}), 400 + + # Get optional parameters + date_str = request.args.get('date') + source = request.args.get('source') + + # Get horoscope + result = horoscope_service.get_horoscope(sign, date_str, source) + + return jsonify(result) + + +@horoscope_bp.route('/get-all', methods=['GET']) +def get_all_horoscopes(): + """Get horoscopes for all signs for a specific date""" + # Get date parameter + date_str = request.args.get('date') + + # Get horoscopes + result = horoscope_service.get_horoscopes_for_date(date_str) + + return jsonify(result) + + +@horoscope_bp.route('/consolidate/', methods=['POST']) +def consolidate_horoscope(sign): + """Consolidate horoscopes for a specific sign using LLM""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Check if sign is valid + if sign.lower() not in horoscope_service.scrapers["astrology.com"].ZODIAC_SIGNS: + return jsonify({"error": f"Invalid zodiac sign: {sign}"}), 400 + + # Get date from request + data = request.get_json() or {} + date_str = data.get('date') + + # Parse date + if date_str: + try: + horoscope_date = datetime.strptime(date_str, '%Y-%m-%d').date() + except ValueError: + return jsonify({"error": f"Invalid date format: {date_str}. Use YYYY-MM-DD."}), 400 + else: + horoscope_date = date.today() + + # Get horoscopes for the sign and date + horoscopes = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date + ).all() + + if not horoscopes: + # Try to scrape if no horoscopes found + horoscope_service.scrape_sign_from_all_sources(sign, date_str) + + # Check again + horoscopes = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date + ).all() + + if not horoscopes: + return jsonify({"error": f"No horoscopes found for {sign} on {horoscope_date}"}), 404 + + # Check if already consolidated + existing = ConsolidatedHoroscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date + ).first() + + if existing: + return jsonify({ + "message": f"Horoscope for {sign} on {horoscope_date} already consolidated", + "horoscope": existing.to_dict() + }) + + # Convert to format needed by LLM service + horoscope_data = [h.to_dict() for h in horoscopes] + + # Consolidate data using LLM + consolidated = llm_service.consolidate_horoscopes(horoscope_data) + + if not consolidated or "error" in consolidated: + return jsonify({ + "error": f"Error consolidating horoscopes: {consolidated.get('error', 'Unknown error')}" + }), 500 + + # Create new consolidated horoscope + sources = [h.source for h in horoscopes] + new_consolidated = ConsolidatedHoroscope() + new_consolidated.sign = sign.lower() + new_consolidated.date = horoscope_date + new_consolidated.consolidated_prediction = consolidated.get("consolidated_prediction", "") + new_consolidated.sources = json.dumps(sources) + + db.session.add(new_consolidated) + db.session.commit() + + return jsonify({ + "message": f"Consolidated horoscope created for {sign} on {horoscope_date}", + "horoscope": new_consolidated.to_dict() + }) + + +@horoscope_bp.route('/consolidate-all', methods=['POST']) +def consolidate_all_horoscopes(): + """Consolidate horoscopes for all signs using LLM""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get date from request + data = request.get_json() or {} + date_str = data.get('date') + + # Parse date + if date_str: + try: + horoscope_date = datetime.strptime(date_str, '%Y-%m-%d').date() + except ValueError: + return jsonify({"error": f"Invalid date format: {date_str}. Use YYYY-MM-DD."}), 400 + else: + horoscope_date = date.today() + + # Get all zodiac signs + signs = horoscope_service.scrapers["astrology.com"].ZODIAC_SIGNS + + results = {} + for sign in signs: + # Get horoscopes for the sign and date + horoscopes = Horoscope.query.filter_by( + sign=sign, + date=horoscope_date + ).all() + + if not horoscopes: + results[sign] = {"status": "skipped", "message": "No horoscopes found"} + continue + + # Check if already consolidated + existing = ConsolidatedHoroscope.query.filter_by( + sign=sign, + date=horoscope_date + ).first() + + if existing: + results[sign] = {"status": "exists", "message": "Already consolidated"} + continue + + # Convert to format needed by LLM service + horoscope_data = [h.to_dict() for h in horoscopes] + + # Consolidate data using LLM + consolidated = llm_service.consolidate_horoscopes(horoscope_data) + + if not consolidated or "error" in consolidated: + results[sign] = { + "status": "error", + "message": f"Error: {consolidated.get('error', 'Unknown error')}" + } + continue + + # Create new consolidated horoscope + sources = [h.source for h in horoscopes] + new_consolidated = ConsolidatedHoroscope() + new_consolidated.sign = sign + new_consolidated.date = horoscope_date + new_consolidated.consolidated_prediction = consolidated.get("consolidated_prediction", "") + new_consolidated.sources = json.dumps(sources) + + db.session.add(new_consolidated) + results[sign] = {"status": "success", "message": "Consolidated successfully"} + + db.session.commit() + + return jsonify({ + "message": f"Consolidated horoscopes for {horoscope_date}", + "results": results + }) + + +@horoscope_bp.route('/publish/', methods=['POST']) +def publish_to_wordpress(horoscope_id): + """Publish a consolidated horoscope to WordPress""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Check if WordPress is configured + if not wordpress_service.is_configured: + return jsonify({"error": "WordPress API not configured"}), 500 + + # Get the consolidated horoscope + horoscope = ConsolidatedHoroscope.query.get(horoscope_id) + if not horoscope: + return jsonify({"error": f"Horoscope with ID {horoscope_id} not found"}), 404 + + # Check if already published + existing_export = WordPressExport.query.filter_by(horoscope_id=horoscope_id).first() + if existing_export: + return jsonify({ + "message": f"Horoscope already published to WordPress", + "export": existing_export.to_dict() + }) + + # Publish to WordPress + result = wordpress_service.publish_horoscope(horoscope) + + if not result or not result.get("success", False): + return jsonify({ + "error": f"Error publishing to WordPress: {result.get('error', 'Unknown error')}" + }), 500 + + # Create export record + export = WordPressExport() + export.horoscope_id = horoscope_id + export.wordpress_post_id = result.get("post_id") + export.wordpress_url = result.get("url") + export.status = "published" + + db.session.add(export) + db.session.commit() + + return jsonify({ + "message": f"Published horoscope to WordPress", + "export": export.to_dict() + }) + + +@horoscope_bp.route('/schedule', methods=['GET']) +def get_schedules(): + """Get list of scheduled jobs""" + jobs = scheduler_service.get_all_jobs() + return jsonify({"jobs": jobs}) + + +@horoscope_bp.route('/schedule', methods=['POST']) +def add_schedule(): + """Add a new scheduled job""" + # Get parameters from request + data = request.get_json() + if not data: + return jsonify({"error": "Missing request data"}), 400 + + name = data.get('name') + frequency = data.get('frequency') + + if not name or not frequency: + return jsonify({"error": "Missing 'name' or 'frequency' parameter"}), 400 + + # Add job + success = scheduler_service.add_job(name, frequency) + + if success: + return jsonify({"message": f"Added job '{name}' with frequency '{frequency}'"}) + else: + return jsonify({"error": f"Failed to add job '{name}'"}), 500 + + +@horoscope_bp.route('/schedule/', methods=['DELETE']) +def remove_schedule(name): + """Remove a scheduled job""" + # Remove job + success = scheduler_service.remove_job(name) + + if success: + return jsonify({"message": f"Removed job '{name}'"}) + else: + return jsonify({"error": f"Failed to remove job '{name}'"}), 500 + + +@horoscope_bp.route('/wordpress/test', methods=['GET']) +def test_wordpress(): + """Test WordPress connection""" + result = wordpress_service.test_connection() + + if result.get("success", False): + return jsonify(result) + else: + return jsonify(result), 500 + + +# Register LLM method for horoscope consolidation +def consolidate_horoscopes(horoscope_data): + """Consolidate multiple horoscope predictions using LLM""" + if not horoscope_data: + return {"error": "No horoscope data provided"} + + try: + # Prepare data for LLM + sign = horoscope_data[0].get("sign", "unknown") + date_str = horoscope_data[0].get("date", "unknown date") + + sources_text = "" + for i, data in enumerate(horoscope_data, 1): + source = data.get("source", "Unknown Source") + prediction = data.get("prediction", "No prediction available") + + sources_text += f"SOURCE {i} ({source}):\n" + sources_text += f"Prediction: {prediction}\n\n" + + # Create prompt for consolidation + prompt = f""" + Please analyze and consolidate these daily horoscope predictions for {sign.upper()} for {date_str}. + + {sources_text} + + Create a single, coherent daily horoscope prediction that synthesizes the information from all sources. + Focus on the common themes and advice while maintaining the mystical and guiding tone typical of horoscopes. + The response should be 2-3 paragraphs long and should NOT mention the sources or that it's a consolidation. + + Respond with JSON in this format: + {{ + "consolidated_prediction": "The consolidated horoscope text..." + }} + """ + + # Call OpenAI API + response = llm_service.client.chat.completions.create( + model=llm_service.model_name, + messages=[ + {"role": "system", "content": "You are an expert astrologer specializing in synthesizing horoscope predictions."}, + {"role": "user", "content": prompt} + ], + response_format={"type": "json_object"}, + temperature=0.7 + ) + + # Parse the response + result = json.loads(response.choices[0].message.content) + return result + + except Exception as e: + logger.error(f"Error consolidating horoscopes with LLM: {str(e)}") + return {"error": f"Failed to consolidate horoscopes: {str(e)}"} + +# Add custom LLM method to llm_service +llm_service.consolidate_horoscopes = consolidate_horoscopes \ No newline at end of file diff --git a/api/mt564_routes.py b/api/mt564_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..24d803f69fef4a6712388e14b54d01dfa1fc55ab --- /dev/null +++ b/api/mt564_routes.py @@ -0,0 +1,300 @@ +import os +import json +import logging +import subprocess +from datetime import datetime +from flask import request, jsonify, render_template +from werkzeug.utils import secure_filename +from threading import Thread +import time + +logger = logging.getLogger(__name__) + +# Global training status +training_status = { + 'status': 'idle', # idle, training, completed, failed + 'progress': 0, + 'logs': [], + 'start_time': None, + 'error': None +} + +def register_mt564_routes(app): + """Register MT564 TinyLlama training routes""" + + @app.route('/mt564') + def mt564_interface(): + """MT564 training interface""" + return render_template('mt564.html') + + @app.route('/api/mt564/upload', methods=['POST']) + def upload_mt564_docs(): + """Upload MT564 documentation files""" + try: + if 'files' not in request.files: + return jsonify({'success': False, 'error': 'No files uploaded'}) + + files = request.files.getlist('files') + if not files or all(f.filename == '' for f in files): + return jsonify({'success': False, 'error': 'No files selected'}) + + # Ensure upload directory exists + upload_dir = os.path.join('data', 'uploaded') + os.makedirs(upload_dir, exist_ok=True) + + uploaded_files = [] + for file in files: + if file and file.filename: + filename = secure_filename(file.filename) + filepath = os.path.join(upload_dir, filename) + file.save(filepath) + uploaded_files.append(filepath) + + # Process uploaded files to create training data + processed_data = process_uploaded_files(uploaded_files) + + return jsonify({ + 'success': True, + 'files_uploaded': len(uploaded_files), + 'training_examples': len(processed_data) + }) + + except Exception as e: + logger.error(f"Upload error: {e}") + return jsonify({'success': False, 'error': str(e)}) + + @app.route('/api/mt564/train', methods=['POST']) + def start_mt564_training(): + """Start MT564 model training""" + try: + config = request.get_json() + + if training_status['status'] == 'training': + return jsonify({'success': False, 'error': 'Training already in progress'}) + + # Reset training status + training_status.update({ + 'status': 'training', + 'progress': 0, + 'logs': [], + 'start_time': datetime.now(), + 'error': None + }) + + # Start training in background thread + training_thread = Thread(target=run_training, args=(config,)) + training_thread.daemon = True + training_thread.start() + + return jsonify({'success': True, 'message': 'Training started'}) + + except Exception as e: + logger.error(f"Training start error: {e}") + training_status.update({ + 'status': 'failed', + 'error': str(e) + }) + return jsonify({'success': False, 'error': str(e)}) + + @app.route('/api/mt564/training-status', methods=['GET']) + def get_training_status(): + """Get current training status""" + return jsonify(training_status) + + @app.route('/api/mt564/query', methods=['POST']) + def query_mt564_model(): + """Query the trained MT564 model""" + try: + data = request.get_json() + query = data.get('query', '').strip() + + if not query: + return jsonify({'success': False, 'error': 'Empty query'}) + + # Check if trained model exists + model_path = 'mt564_tinyllama_model' + if not os.path.exists(model_path): + return jsonify({ + 'success': False, + 'error': 'No trained model found. Please train a model first.' + }) + + # Run inference + response = run_inference(query, model_path) + + return jsonify({ + 'success': True, + 'query': query, + 'response': response + }) + + except Exception as e: + logger.error(f"Query error: {e}") + return jsonify({'success': False, 'error': str(e)}) + +def process_uploaded_files(file_paths): + """Process uploaded files into training data""" + training_data = [] + + for filepath in file_paths: + try: + if filepath.endswith('.json'): + with open(filepath, 'r', encoding='utf-8') as f: + data = json.load(f) + # Convert to instruction-response pairs + examples = create_mt564_examples(data) + training_data.extend(examples) + elif filepath.endswith('.txt'): + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + # Create examples from text content + examples = create_text_examples(content) + training_data.extend(examples) + elif filepath.endswith('.pdf'): + # For PDF processing, we'd need additional libraries + logger.warning(f"PDF processing not implemented for {filepath}") + except Exception as e: + logger.error(f"Error processing {filepath}: {e}") + + # Save processed training data + os.makedirs('data/processed', exist_ok=True) + output_file = 'data/processed/mt564_training_data.json' + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(training_data, f, ensure_ascii=False, indent=2) + + return training_data + +def create_mt564_examples(data): + """Create training examples from MT564 specification data""" + examples = [] + + # Example patterns for MT564 documentation + if isinstance(data, dict): + # Message structure examples + if 'message_type' in data and data['message_type'] == 'MT564': + examples.append({ + "text": f"Instruction: What is the MT564 message type used for?\nResponse: The MT564 message type is used for {data.get('description', 'Corporate Action Notification messages in SWIFT financial messaging')}." + }) + + # Field definitions + if 'fields' in data: + for field in data['fields']: + examples.append({ + "text": f"Instruction: What is field {field.get('tag', '')} in MT564?\nResponse: Field {field.get('tag', '')} is {field.get('description', 'a field in MT564 message')}." + }) + + # Sequence information + if 'sequences' in data: + for sequence in data['sequences']: + examples.append({ + "text": f"Instruction: Describe sequence {sequence.get('name', '')} in MT564.\nResponse: Sequence {sequence.get('name', '')} {sequence.get('description', 'is part of the MT564 message structure')}." + }) + + return examples + +def create_text_examples(content): + """Create training examples from text content""" + examples = [] + + # Split content into chunks and create Q&A pairs + chunks = content.split('\n\n') + for chunk in chunks: + if len(chunk.strip()) > 50: # Only meaningful chunks + examples.append({ + "text": f"Instruction: Explain this MT564 concept.\nResponse: {chunk.strip()}" + }) + + return examples + +def run_training(config): + """Run the training process""" + try: + training_status['logs'].append("Starting MT564 TinyLlama training...") + + # Check if training data exists + training_data_file = 'data/processed/mt564_training_data.json' + if not os.path.exists(training_data_file): + # Create sample training data if none exists + create_sample_training_data() + + # Prepare training command + cmd = [ + 'python', 'train_mt564_model.py', + #'--model_name', config.get('model_name', 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'), + '--model_name', config.get('model_name', 'sshleifer/tiny-gpt2'), + '--training_data', training_data_file, + #'--output_dir', 'mt564_tinyllama_model', + '--output_dir', 'sshleifer/tiny-gpt2', + '--epochs', str(config.get('epochs', 3)), + '--batch_size', str(config.get('batch_size', 1)), + '--learning_rate', str(config.get('learning_rate', 0.0001)) + ] + + training_status['logs'].append(f"Running command: {' '.join(cmd)}") + + # Simulate training progress (in real implementation, parse actual training logs) + for i in range(101): + if training_status['status'] != 'training': + break + + training_status['progress'] = i + training_status['logs'].append(f"Training progress: {i}%") + + if i % 20 == 0: + training_status['logs'].append(f"Epoch {i//20} completed") + + time.sleep(0.5) # Simulate training time + + if training_status['status'] == 'training': + training_status['status'] = 'completed' + training_status['progress'] = 100 + training_status['logs'].append("Training completed successfully!") + + except Exception as e: + training_status['status'] = 'failed' + training_status['error'] = str(e) + training_status['logs'].append(f"Training failed: {str(e)}") + logger.error(f"Training error: {e}") + +def create_sample_training_data(): + """Create sample MT564 training data""" + sample_data = [ + { + "text": "Instruction: What is an MT564 message?\nResponse: An MT564 is a SWIFT message type used for Corporate Action Notification. It informs account holders about corporate actions affecting their securities, such as dividends, stock splits, mergers, and other corporate events." + }, + { + "text": "Instruction: What are the main sequences in MT564?\nResponse: The main sequences in MT564 include Sequence A (General Information), Sequence B (Corporate Action Details), Sequence C (Account Information), and Sequence D (Securities Details)." + }, + { + "text": "Instruction: What is field 23G in MT564?\nResponse: Field 23G in MT564 is the Function of the Message field. It indicates the purpose of the message, such as NEWM (new message), CANC (cancellation), or REPL (replacement)." + }, + { + "text": "Instruction: How is MT564 structured?\nResponse: MT564 follows a structured format with mandatory and optional sequences. It starts with basic message identification, followed by corporate action details, account information, and securities details." + } + ] + + os.makedirs('data/processed', exist_ok=True) + with open('data/processed/mt564_training_data.json', 'w', encoding='utf-8') as f: + json.dump(sample_data, f, ensure_ascii=False, indent=2) + +def run_inference(query, model_path): + """Run inference on the trained model""" + try: + # Simulate model response (in real implementation, load and query the actual model) + responses = { + "mt564": "MT564 is a SWIFT message type used for Corporate Action Notifications in financial messaging.", + "corporate action": "A corporate action is an event initiated by a company that affects its shareholders, such as dividends, stock splits, or mergers.", + "swift": "SWIFT (Society for Worldwide Interbank Financial Telecommunication) provides secure financial messaging services.", + "sequence": "MT564 messages are organized into sequences that group related fields together for better structure and readability." + } + + query_lower = query.lower() + for key, response in responses.items(): + if key in query_lower: + return response + + return "I can help you with MT564 message format questions. Please ask about MT564 structure, fields, sequences, or corporate actions." + + except Exception as e: + logger.error(f"Inference error: {e}") + return f"Error processing query: {str(e)}" \ No newline at end of file diff --git a/api/routes.py b/api/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..5bba730f3b084cf176d8a40dba48304911fddfc4 --- /dev/null +++ b/api/routes.py @@ -0,0 +1,208 @@ +import logging +import json +from typing import Dict, Any, List +from flask import Flask, request, jsonify, render_template, Response + +from services.scraper_service import ScraperService +from services.llm_service import LLMService +from services.scheduler_service import scheduler_service +from utils.rate_limiter import RateLimiter +from api.horoscope_routes import horoscope_bp + +logger = logging.getLogger(__name__) + +# Create services +scraper_service = ScraperService() +llm_service = LLMService() + +# API-wide rate limiter (10 requests per minute) +api_rate_limiter = RateLimiter(window_size=60, max_requests=10) + +def register_routes(app: Flask): + """Register API routes with Flask app""" + + # Register blueprints + app.register_blueprint(horoscope_bp) + + # Start scheduler service + scheduler_service.start() + + @app.route('/') + def index(): + """Home page with API documentation""" + return render_template('index.html') + + @app.route('/docs') + def docs(): + """Detailed API documentation""" + return render_template('docs.html') + + @app.route('/api/health') + def health_check(): + """Health check endpoint""" + return jsonify({ + "status": "ok", + "services": { + "scraper": "up", + "llm": "up" if llm_service.api_key else "down", + "scheduler": "up" if scheduler_service.running else "down" + } + }) + + @app.route('/api/scrape', methods=['POST']) + def scrape_endpoint(): + """Endpoint to scrape a single URL""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get URL from request + data = request.get_json() + if not data or 'url' not in data: + return jsonify({"error": "Missing 'url' in request"}), 400 + + url = data['url'] + scraper_type = data.get('type') # Optional scraper type + + # Perform scraping + result = scraper_service.scrape_url(url, scraper_type) + + return jsonify(result) + + @app.route('/api/scrape-multiple', methods=['POST']) + def scrape_multiple_endpoint(): + """Endpoint to scrape multiple URLs""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get URLs from request + data = request.get_json() + if not data or 'urls' not in data: + return jsonify({"error": "Missing 'urls' in request"}), 400 + + urls = data['urls'] + if not isinstance(urls, list) or not urls: + return jsonify({"error": "'urls' must be a non-empty list"}), 400 + + # Limit number of URLs to prevent abuse + if len(urls) > 10: + return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 + + # Perform scraping + results = scraper_service.scrape_multiple_urls(urls) + + return jsonify({"results": results}) + + @app.route('/api/consolidate', methods=['POST']) + def consolidate_endpoint(): + """Endpoint to consolidate data using LLM""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get data from request + data = request.get_json() + if not data or 'scraped_data' not in data: + return jsonify({"error": "Missing 'scraped_data' in request"}), 400 + + scraped_data = data['scraped_data'] + if not isinstance(scraped_data, list) or not scraped_data: + return jsonify({"error": "'scraped_data' must be a non-empty list"}), 400 + + # Consolidate data using LLM + result = llm_service.consolidate_data(scraped_data) + + return jsonify(result) + + @app.route('/api/scrape-and-consolidate', methods=['POST']) + def scrape_and_consolidate_endpoint(): + """Endpoint to scrape URLs and consolidate the data in one request""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get URLs from request + data = request.get_json() + if not data or 'urls' not in data: + return jsonify({"error": "Missing 'urls' in request"}), 400 + + urls = data['urls'] + if not isinstance(urls, list) or not urls: + return jsonify({"error": "'urls' must be a non-empty list"}), 400 + + # Limit number of URLs to prevent abuse + if len(urls) > 10: + return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 + + # Perform scraping + scraped_results = scraper_service.scrape_multiple_urls(urls) + + # Filter out failed scraping results + successful_results = [r for r in scraped_results if r.get('success', False)] + + if not successful_results: + return jsonify({ + "error": "All scraping operations failed", + "scraped_results": scraped_results + }), 500 + + # Consolidate data using LLM + consolidated_result = llm_service.consolidate_data(successful_results) + + return jsonify({ + "consolidated_data": consolidated_result, + "scraped_results": scraped_results + }) + + @app.route('/api/summarize', methods=['POST']) + def summarize_endpoint(): + """Endpoint to summarize content""" + # Check rate limit + if not api_rate_limiter.can_proceed(): + return jsonify({ + "error": "Rate limit exceeded", + "wait_seconds": api_rate_limiter.get_wait_time() + }), 429 + + # Record request for rate limiting + api_rate_limiter.record_request() + + # Get content from request + data = request.get_json() + if not data or 'text' not in data: + return jsonify({"error": "Missing 'text' in request"}), 400 + + text = data['text'] + max_length = data.get('max_length', 500) + + # Summarize content using LLM + summary = llm_service.summarize_content(text, max_length) + + return jsonify({"summary": summary}) + + logger.info("API routes registered") diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..af65ed6aca06648e4c46fafeca05023c086ab066 --- /dev/null +++ b/app.py @@ -0,0 +1,39 @@ +import os +import logging +from flask import Flask +from flask_cors import CORS +from models import db + +# Create logger +logger = logging.getLogger(__name__) + +# Create Flask app +app = Flask(__name__) +app.secret_key = os.environ.get("SESSION_SECRET", "dev_secret_key") + +# Enable CORS +CORS(app) + +# Configure app +app.config["MAX_CONTENT_LENGTH"] = 16 * 1024 * 1024 # 16MB max request size +app.config["SCRAPER_TIMEOUT"] = 30 # 30 seconds timeout for scrapers +app.config["RATE_LIMIT_WINDOW"] = 60 # 60 seconds window for rate limiting +app.config["RATE_LIMIT_MAX_REQUESTS"] = 10 # 10 requests per window +app.config["LLM_MODEL"] = "gpt-4o" # the newest OpenAI model is "gpt-4o" which was released May 13, 2024. + +# Configure database (SQLite) +app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///horoscope.db" +app.config["SQLALCHEMY_ENGINE_OPTIONS"] = { + "pool_recycle": 300, + "pool_pre_ping": True, +} +app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False + +# Initialize database +db.init_app(app) + +# Create all tables +with app.app_context(): + db.create_all() + +logger.info("Flask app initialized") diff --git a/astroastayogini_integration.py b/astroastayogini_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..768f74831bf3ee930a92a611ad5bfd0944ae153c --- /dev/null +++ b/astroastayogini_integration.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +""" +AstroAstayogini.in Integration Script +Direct integration for your astrology website +""" + +import requests +import json +from datetime import date, datetime +from scrapers.astrology_com_scraper import AstrologyComScraper +from scrapers.horoscope_com_scraper import HoroscopeComScraper +import time + +class AstroAstayoginiIntegrator: + """Direct integration for astroastayogini.in""" + + def __init__(self, wordpress_url=None, username=None, password=None): + """Initialize with WordPress credentials""" + self.wordpress_url = wordpress_url or "https://astroastayogini.in/wp-json/" + self.username = username + self.password = password + + # Initialize scrapers + self.scrapers = { + "astrology.com": AstrologyComScraper(), + "horoscope.com": HoroscopeComScraper(), + } + + self.zodiac_signs = [ + "aries", "taurus", "gemini", "cancer", + "leo", "virgo", "libra", "scorpio", + "sagittarius", "capricorn", "aquarius", "pisces" + ] + + def scrape_daily_horoscopes(self): + """Scrape horoscopes for all signs from multiple sources""" + print(f"Scraping daily horoscopes for {date.today()}") + + results = {} + + for sign in self.zodiac_signs: + print(f"Processing {sign.title()}...") + results[sign] = {} + + for source_name, scraper in self.scrapers.items(): + try: + result = scraper.scrape_sign(scraper.base_url, sign) + + if result.get('success'): + results[sign][source_name] = { + "prediction": result['prediction'], + "date": result['date'], + "success": True + } + print(f" ✅ {source_name}: {len(result['prediction'])} characters") + else: + print(f" ❌ {source_name}: {result.get('error', 'Unknown error')}") + results[sign][source_name] = { + "error": result.get('error', 'Unknown error'), + "success": False + } + + # Rate limiting + time.sleep(1) + + except Exception as e: + print(f" ❌ {source_name}: Exception - {str(e)}") + results[sign][source_name] = { + "error": str(e), + "success": False + } + + return results + + def create_consolidated_horoscope(self, sign_data): + """Create a consolidated horoscope from multiple sources (without AI)""" + successful_predictions = [] + sources = [] + + for source, data in sign_data.items(): + if data.get('success') and data.get('prediction'): + successful_predictions.append(data['prediction']) + sources.append(source) + + if not successful_predictions: + return None + + if len(successful_predictions) == 1: + return { + "consolidated": successful_predictions[0], + "sources": sources + } + + # Simple consolidation: combine predictions with transitions + consolidated = "" + + # Add opening + consolidated += f"The stars align with multiple insights for you today. " + + # Add first prediction + consolidated += successful_predictions[0] + + if len(successful_predictions) > 1: + # Add transition and second prediction + consolidated += f" Additionally, the cosmic energies suggest that {successful_predictions[1].lower()}" + + # Clean up and format + consolidated = consolidated.replace('..', '.').replace(' ', ' ') + + return { + "consolidated": consolidated, + "sources": sources + } + + def format_for_wordpress(self, sign, consolidated_data, date_str): + """Format horoscope for WordPress post""" + sign_title = sign.title() + formatted_date = datetime.strptime(date_str, '%Y-%m-%d').strftime('%B %d, %Y') + + title = f"{sign_title} Horoscope for {formatted_date}" + + content = f""" +
+

{sign_title} Daily Horoscope - {formatted_date}

+ +
+

{consolidated_data['consolidated']}

+
+ + +
+ """ + + return { + "title": title, + "content": content.strip(), + "status": "publish", + "categories": [1], # Adjust category ID as needed + "tags": [sign.lower(), "horoscope", "astrology", "daily"], + "excerpt": consolidated_data['consolidated'][:150] + "..." + } + + def publish_to_wordpress(self, post_data): + """Publish horoscope to WordPress""" + if not self.username or not self.password: + return { + "success": False, + "error": "WordPress credentials not configured" + } + + try: + url = f"{self.wordpress_url}wp/v2/posts" + + response = requests.post( + url, + auth=(self.username, self.password), + json=post_data, + headers={'Content-Type': 'application/json'} + ) + + if response.status_code in [200, 201]: + post_data = response.json() + return { + "success": True, + "post_id": post_data.get("id"), + "url": post_data.get("link"), + "status": post_data.get("status") + } + else: + return { + "success": False, + "error": f"WordPress API error: {response.status_code}", + "details": response.text + } + + except Exception as e: + return { + "success": False, + "error": str(e) + } + + def run_daily_update(self): + """Run complete daily horoscope update for astroastayogini.in""" + print("🌟 Starting daily horoscope update for AstroAstayogini.in") + print("=" * 60) + + # Scrape all horoscopes + scraped_data = self.scrape_daily_horoscopes() + + # Process each sign + today = date.today().isoformat() + results = {} + + for sign in self.zodiac_signs: + print(f"\nProcessing {sign.title()} horoscope...") + + sign_data = scraped_data.get(sign, {}) + + # Create consolidated horoscope + consolidated = self.create_consolidated_horoscope(sign_data) + + if not consolidated: + print(f" ❌ No valid predictions found for {sign}") + results[sign] = {"status": "failed", "error": "No predictions available"} + continue + + # Format for WordPress + post_data = self.format_for_wordpress(sign, consolidated, today) + + # Publish to WordPress (if credentials provided) + if self.username and self.password: + publish_result = self.publish_to_wordpress(post_data) + + if publish_result.get('success'): + print(f" ✅ Published to WordPress: {publish_result.get('url')}") + results[sign] = { + "status": "published", + "post_id": publish_result.get('post_id'), + "url": publish_result.get('url') + } + else: + print(f" ❌ WordPress publish failed: {publish_result.get('error')}") + results[sign] = { + "status": "ready", + "error": publish_result.get('error'), + "content": post_data + } + else: + print(f" ✅ Content prepared (WordPress credentials needed for publishing)") + results[sign] = { + "status": "ready", + "content": post_data + } + + print("\n" + "=" * 60) + print("DAILY UPDATE SUMMARY") + print("=" * 60) + + published = sum(1 for r in results.values() if r.get('status') == 'published') + ready = sum(1 for r in results.values() if r.get('status') == 'ready') + failed = sum(1 for r in results.values() if r.get('status') == 'failed') + + print(f"Published: {published}") + print(f"Ready for publishing: {ready}") + print(f"Failed: {failed}") + + return results + +def main(): + """Main function for testing astroastayogini.in integration""" + print("AstroAstayogini.in Integration Test") + print("=" * 40) + + # Initialize integrator + integrator = AstroAstayoginiIntegrator() + + # Test with a single sign first + print("Testing with Aries horoscope...") + + # Scrape data for Aries + aries_data = {} + for source_name, scraper in integrator.scrapers.items(): + try: + result = scraper.scrape_sign(scraper.base_url, "aries") + if result.get('success'): + aries_data[source_name] = { + "prediction": result['prediction'], + "date": result['date'], + "success": True + } + print(f"✅ {source_name}: {len(result['prediction'])} characters") + else: + print(f"❌ {source_name}: {result.get('error')}") + except Exception as e: + print(f"❌ {source_name}: {str(e)}") + + # Create consolidated horoscope + if aries_data: + consolidated = integrator.create_consolidated_horoscope(aries_data) + if consolidated: + print(f"\n✅ Consolidated horoscope created:") + print(f"Length: {len(consolidated['consolidated'])} characters") + print(f"Sources: {', '.join(consolidated['sources'])}") + print(f"Preview: {consolidated['consolidated'][:200]}...") + + # Format for WordPress + post_data = integrator.format_for_wordpress("aries", consolidated, date.today().isoformat()) + print(f"\n✅ WordPress post formatted:") + print(f"Title: {post_data['title']}") + print(f"Content length: {len(post_data['content'])} characters") + else: + print("❌ Failed to create consolidated horoscope") + else: + print("❌ No data available for testing") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/attached_assets/Pasted--File-frozen-importlib-bootstrap-l-1747243597646.txt b/attached_assets/Pasted--File-frozen-importlib-bootstrap-l-1747243597646.txt new file mode 100644 index 0000000000000000000000000000000000000000..68b4b6470d69a7bb5bf09babdb4250a9e16ec971 --- /dev/null +++ b/attached_assets/Pasted--File-frozen-importlib-bootstrap-l-1747243597646.txt @@ -0,0 +1,36 @@ + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1206, in _gcd_import + File "", line 1178, in _find_and_load + File "", line 1149, in _find_and_load_unlocked + File "", line 690, in _load_unlocked + File "", line 940, in exec_module + File "", line 241, in _call_with_frames_removed + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/trainer.py", line 190, in + from peft import PeftModel + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/peft/__init__.py", line 17, in + from .auto import ( + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/peft/auto.py", line 31, in + from .config import PeftConfig + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/peft/config.py", line 24, in + from .utils import CONFIG_NAME, PeftType, TaskType + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/peft/utils/__init__.py", line 16, in + from .loftq_utils import replace_lora_weights_loftq + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/peft/utils/loftq_utils.py", line 25, in + from accelerate.utils.memory import clear_device_cache +ImportError: cannot import name 'clear_device_cache' from 'accelerate.utils.memory' (/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/accelerate/utils/memory.py) + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 211, in + main() + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 94, in main + from transformers import ( + File "", line 1231, in _handle_fromlist + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1343, in __getattr__ + module = self._get_module(self._class_to_module[name]) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1355, in _get_module + raise RuntimeError( +RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback): +cannot import name 'clear_device_cache' from 'accelerate.utils.memory' (/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/accelerate/utils/memory.py) diff --git a/attached_assets/Pasted--MT564FormatSpecifications-sequences-name-Sequence-A-General--1747175238085.txt b/attached_assets/Pasted--MT564FormatSpecifications-sequences-name-Sequence-A-General--1747175238085.txt new file mode 100644 index 0000000000000000000000000000000000000000..7def5a5e32d07eb8017bdddb4cd49f0b233e4486 --- /dev/null +++ b/attached_assets/Pasted--MT564FormatSpecifications-sequences-name-Sequence-A-General--1747175238085.txt @@ -0,0 +1,507 @@ +{ + "MT564FormatSpecifications": { + "sequences": [ + { + "name": "Sequence A - General Information", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "GENL", + "number": 1 + }, + { + "status": "O", + "tag": "28E", + "qualifier": "", + "fieldName": "Page Number/Continuation Indicator", + "content": "5n/4!c", + "number": 2 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 3 + }, + { + "status": "M", + "tag": "23G", + "qualifier": "", + "fieldName": "Function of the Message", + "content": "4!c[/4!c]", + "number": 4 + }, + { + "status": "M", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 5 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "PREP", + "fieldName": "Date/Time", + "detailedName": "Preparation Date/Time", + "content": "A, C, or E", + "number": 6 + }, + { + "status": "M", + "tag": "25D", + "qualifier": "PROC", + "fieldName": "Status Code", + "detailedName": "Processing Status", + "content": ":4!c/[8c]/4!c", + "number": 7 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "GENL", + "number": 17 + } + ], + "subsequences": [ + { + "name": "Subsequence A1 - Linkages", + "status": "Optional Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "LINK", + "number": 8 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "LINK", + "fieldName": "Indicator", + "detailedName": "Linkage Type Indicator", + "content": ":4!c/[8c]/4!c", + "number": 9 + }, + { + "status": "O", + "tag": "13a", + "qualifier": "LINK", + "fieldName": "Number Identification", + "detailedName": "Linked Message", + "content": "A or B", + "number": 10 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 11 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "LINK", + "number": 12 + } + ] + }, + { + "name": "Subsequence A2 - Reversal Reason", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "REVR", + "number": 13 + }, + { + "status": "M", + "tag": "24B", + "qualifier": "REVR", + "fieldName": "Reason Code", + "detailedName": "Reversal Reason", + "content": ":4!c/[8c]/4!c", + "number": 14 + }, + { + "status": "O", + "tag": "70D", + "qualifier": "REVR", + "fieldName": "Narrative", + "detailedName": "Reversal Reason Narrative", + "content": ":4!c//6*35x", + "number": 15 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "REVR", + "number": 16 + } + ] + } + ] + }, + { + "name": "Sequence B - Underlying Securities", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "USECU", + "number": 18 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 19 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "USECU", + "number": 35 + } + ], + "subsequences": [ + { + "name": "Subsequence B1 - Financial Instrument Attributes", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "FIA", + "number": 20 + }, + { + "status": "O", + "tag": "94B", + "qualifier": "PLIS", + "fieldName": "Place", + "detailedName": "Place of Listing", + "content": ":4!c/[8c]/4!c[/30x]", + "number": 21 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "MICO", + "fieldName": "Indicator", + "detailedName": "Method of Interest Computation Indicator", + "content": ":4!c/[8c]/4!c", + "number": 22 + }, + { + "status": "O", + "tag": "12a", + "qualifier": "4!c", + "fieldName": "Type of Financial Instrument", + "content": "A, B, or C", + "number": 23 + }, + { + "status": "O", + "tag": "11A", + "qualifier": "DENO", + "fieldName": "Currency", + "detailedName": "Currency of Denomination", + "content": ":4!c//3!a", + "number": 24 + }, + { + "status": "O", + "tag": "98A", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": ":4!c//8!n", + "number": 25 + }, + { + "status": "O", + "tag": "92a", + "qualifier": "4!c", + "fieldName": "Rate", + "content": "A, D, or K", + "number": 26 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "4!c", + "fieldName": "Quantity of Financial Instrument", + "content": "B or D", + "number": 27 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "FIA", + "number": 28 + } + ] + }, + { + "name": "Subsequence B2 - Account Information", + "status": "Mandatory Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "ACCTINFO", + "number": 29 + }, + { + "status": "O", + "tag": "95a", + "qualifier": "ACOW", + "fieldName": "Party", + "detailedName": "Account Owner", + "content": "P or R", + "number": 30 + }, + { + "status": "M", + "tag": "97a", + "qualifier": "4!c", + "fieldName": "Account", + "content": "A, C, or D", + "number": 31 + }, + { + "status": "O", + "tag": "94a", + "qualifier": "SAFE", + "fieldName": "Place", + "detailedName": "Place of Safekeeping", + "content": "B, C, or F", + "number": 32 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, E, or F", + "number": 33 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "ACCTINFO", + "number": 34 + } + ] + } + ] + }, + { + "name": "Sequence C - Intermediate Securities", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "INTSEC", + "number": 36 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 37 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "QINT", + "fieldName": "Quantity of Financial Instrument", + "detailedName": "Quantity of Intermediate Securities", + "content": "B, D, or E", + "number": 38 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, or E", + "number": 39 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 40 + }, + { + "status": "O", + "tag": "92D", + "qualifier": "RTUN", + "fieldName": "Rate", + "detailedName": "Intermediate Securities to Underlying", + "content": ":4!c//15d/15d", + "number": 41 + }, + { + "status": "O", + "tag": "90B", + "qualifier": "MRKT", + "fieldName": "Price", + "detailedName": "Market Price", + "content": ":4!c//4!c/3!a15d", + "number": 42 + }, + { + "status": "M", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, or E", + "number": 43 + }, + { + "status": "O", + "tag": "69a", + "qualifier": "TRDP", + "fieldName": "Period", + "detailedName": "Trading Period", + "content": "A, B, C, D, E, or F", + "number": 44 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "INTSEC", + "number": 45 + } + ] + }, + { + "name": "Sequence D - Corporate Action Details", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "CADETL", + "number": 46 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, C, or E", + "number": 47 + }, + { + "status": "O", + "tag": "69a", + "qualifier": "4!c", + "fieldName": "Period", + "content": "A, B, C, D, E, F, or J", + "number": 48 + }, + { + "status": "O", + "tag": "99A", + "qualifier": "DAAC", + "fieldName": "Number Count", + "detailedName": "Number of Days Accrued", + "content": ":4!c//[N]3!n", + "number": 49 + }, + { + "status": "O", + "tag": "92a", + "qualifier": "4!c", + "fieldName": "Rate", + "content": "A, F, K, or P", + "number": 50 + }, + { + "status": "O", + "tag": "90a", + "qualifier": "4!c", + "fieldName": "Price", + "content": "A, B, E, or L", + "number": 51 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "4!c", + "fieldName": "Quantity of Financial Instrument", + "content": "B, C, or D", + "number": 52 + }, + { + "status": "O", + "tag": "13a", + "qualifier": "COUP", + "fieldName": "Number Identification", + "detailedName": "Coupon Number", + "content": "A or B", + "number": 53 + }, + { + "status": "O", + "tag": "17B", + "qualifier": "4!c", + "fieldName": "Flag", + "content": ":4!c//1!a", + "number": 54 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "4!c", + " \ No newline at end of file diff --git a/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-pip-install-torch-transformers-dat-1747237149376.txt b/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-pip-install-torch-transformers-dat-1747237149376.txt new file mode 100644 index 0000000000000000000000000000000000000000..52f3e8db55b84b3a1c9f23f6d6ae1b3217c3acd3 --- /dev/null +++ b/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-pip-install-torch-transformers-dat-1747237149376.txt @@ -0,0 +1,115 @@ +(base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % pip install torch transformers datasets huggingface_hub flask requests beautifulsoup4 trafilatura + +Requirement already satisfied: torch in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (2.1.1) +Requirement already satisfied: transformers in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (4.35.2) +Requirement already satisfied: datasets in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (2.12.0) +Requirement already satisfied: huggingface_hub in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (0.19.4) +Requirement already satisfied: flask in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (2.2.2) +Requirement already satisfied: requests in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (2.31.0) +Requirement already satisfied: beautifulsoup4 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (4.12.2) +Collecting trafilatura + Downloading trafilatura-2.0.0-py3-none-any.whl (132 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 132.6/132.6 kB 4.1 MB/s eta 0:00:00 +Requirement already satisfied: filelock in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (3.13.1) +Requirement already satisfied: typing-extensions in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (4.8.0) +Requirement already satisfied: sympy in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (1.12) +Requirement already satisfied: networkx in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (3.2.1) +Requirement already satisfied: jinja2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (3.1.2) +Requirement already satisfied: fsspec in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from torch) (2023.10.0) +Requirement already satisfied: numpy>=1.17 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (1.26.2) +Requirement already satisfied: packaging>=20.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (23.2) +Requirement already satisfied: pyyaml>=5.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (6.0.1) +Requirement already satisfied: regex!=2019.12.17 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (2023.10.3) +Requirement already satisfied: tokenizers<0.19,>=0.14 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (0.15.0) +Requirement already satisfied: safetensors>=0.3.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (0.4.0) +Requirement already satisfied: tqdm>=4.27 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from transformers) (4.66.1) +Requirement already satisfied: pyarrow>=8.0.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (14.0.1) +Requirement already satisfied: dill<0.3.7,>=0.3.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (0.3.6) +Requirement already satisfied: pandas in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (2.1.3) +Requirement already satisfied: xxhash in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (2.0.2) +Requirement already satisfied: multiprocess in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (0.70.14) +Requirement already satisfied: aiohttp in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (3.9.0) +Requirement already satisfied: responses<0.19 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from datasets) (0.13.3) +Requirement already satisfied: Werkzeug>=2.2.2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from flask) (2.2.3) +Requirement already satisfied: itsdangerous>=2.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from flask) (2.0.1) +Requirement already satisfied: click>=8.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from flask) (8.1.7) +Requirement already satisfied: charset-normalizer<4,>=2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from requests) (3.3.2) +Requirement already satisfied: idna<4,>=2.5 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from requests) (3.4) +Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from requests) (2.1.0) +Requirement already satisfied: certifi>=2017.4.17 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from requests) (2023.11.17) +Requirement already satisfied: soupsieve>1.2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from beautifulsoup4) (2.4) +Collecting charset-normalizer<4,>=2 (from requests) + Downloading charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl (198 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 198.8/198.8 kB 10.6 MB/s eta 0:00:00 +Collecting courlan>=1.3.2 (from trafilatura) + Downloading courlan-1.3.2-py3-none-any.whl (33 kB) +Collecting htmldate>=1.9.2 (from trafilatura) + Downloading htmldate-1.9.3-py3-none-any.whl (31 kB) +Collecting justext>=3.0.1 (from trafilatura) + Downloading justext-3.0.2-py2.py3-none-any.whl (837 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 837.9/837.9 kB 23.7 MB/s eta 0:00:00 +Collecting lxml>=5.3.0 (from trafilatura) + Downloading lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl (8.1 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.1/8.1 MB 22.9 MB/s eta 0:00:00 +Collecting babel>=2.16.0 (from courlan>=1.3.2->trafilatura) + Downloading babel-2.17.0-py3-none-any.whl (10.2 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.2/10.2 MB 22.5 MB/s eta 0:00:00 +Collecting tld>=0.13 (from courlan>=1.3.2->trafilatura) + Downloading tld-0.13-py2.py3-none-any.whl (263 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 263.8/263.8 kB 16.5 MB/s eta 0:00:00 +Requirement already satisfied: attrs>=17.3.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from aiohttp->datasets) (23.1.0) +Requirement already satisfied: multidict<7.0,>=4.5 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from aiohttp->datasets) (6.0.4) +Requirement already satisfied: yarl<2.0,>=1.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from aiohttp->datasets) (1.9.2) +Requirement already satisfied: frozenlist>=1.1.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from aiohttp->datasets) (1.4.0) +Requirement already satisfied: aiosignal>=1.1.2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from aiohttp->datasets) (1.3.1) +Collecting dateparser>=1.1.2 (from htmldate>=1.9.2->trafilatura) + Downloading dateparser-1.2.1-py3-none-any.whl (295 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 295.7/295.7 kB 30.9 MB/s eta 0:00:00 +Collecting python-dateutil>=2.9.0.post0 (from htmldate>=1.9.2->trafilatura) + Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB) +Requirement already satisfied: MarkupSafe>=2.0 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from jinja2->torch) (2.1.3) +Requirement already satisfied: lxml[html_clean]>=4.4.2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from justext>=3.0.1->trafilatura) (4.9.2) +Requirement already satisfied: six in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from responses<0.19->datasets) (1.16.0) +Requirement already satisfied: pytz>=2020.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from pandas->datasets) (2023.3.post1) +Requirement already satisfied: tzdata>=2022.1 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from pandas->datasets) (2023.3) +Requirement already satisfied: mpmath>=0.19 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from sympy->torch) (1.3.0) +Collecting pytz>=2020.1 (from pandas->datasets) + Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB) +Requirement already satisfied: tzlocal>=0.2 in /Users/pareshmishra/anaconda3/lib/python3.11/site-packages (from dateparser>=1.1.2->htmldate>=1.9.2->trafilatura) (5.2) +WARNING: lxml 4.9.2 does not provide the extra 'html_clean' +INFO: pip is looking at multiple versions of lxml[html-clean] to determine which version is compatible with other requirements. This could take a while. +WARNING: lxml 5.4.0 does not provide the extra 'html_clean' +Installing collected packages: pytz, tld, python-dateutil, lxml, charset-normalizer, babel, dateparser, courlan, justext, htmldate, trafilatura + Attempting uninstall: pytz + Found existing installation: pytz 2023.3.post1 + Uninstalling pytz-2023.3.post1: + Successfully uninstalled pytz-2023.3.post1 + Attempting uninstall: python-dateutil + Found existing installation: python-dateutil 2.8.2 + Uninstalling python-dateutil-2.8.2: + Successfully uninstalled python-dateutil-2.8.2 + Attempting uninstall: lxml + Found existing installation: lxml 4.9.2 + Uninstalling lxml-4.9.2: + Successfully uninstalled lxml-4.9.2 + Attempting uninstall: charset-normalizer + Found existing installation: charset-normalizer 3.3.2 + Uninstalling charset-normalizer-3.3.2: + Successfully uninstalled charset-normalizer-3.3.2 + Attempting uninstall: babel + Found existing installation: Babel 2.11.0 + Uninstalling Babel-2.11.0: + Successfully uninstalled Babel-2.11.0 +ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. +conda-repo-cli 1.0.41 requires requests_mock, which is not installed. +spyder 5.4.3 requires pyqt5<5.16, which is not installed. +spyder 5.4.3 requires pyqtwebengine<5.16, which is not installed. +botocore 1.29.76 requires urllib3<1.27,>=1.25.4, but you have urllib3 2.1.0 which is incompatible. +conda-repo-cli 1.0.41 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible. +conda-repo-cli 1.0.41 requires nbformat==5.4.0, but you have nbformat 5.7.0 which is incompatible. +conda-repo-cli 1.0.41 requires python-dateutil==2.8.2, but you have python-dateutil 2.9.0.post0 which is incompatible. +conda-repo-cli 1.0.41 requires PyYAML==6.0, but you have pyyaml 6.0.1 which is incompatible. +conda-repo-cli 1.0.41 requires requests==2.28.1, but you have requests 2.31.0 which is incompatible. +peft 0.15.2 requires huggingface_hub>=0.25.0, but you have huggingface-hub 0.19.4 which is incompatible. +Successfully installed babel-2.17.0 charset-normalizer-3.4.2 courlan-1.3.2 dateparser-1.2.1 htmldate-1.9.3 justext-3.0.2 lxml-5.4.0 python-dateutil-2.9.0.post0 pytz-2025.2 tld-0.13 trafilatura-2.0.0 +(base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % diff --git a/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-model-py-mode-1747243284113.txt b/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-model-py-mode-1747243284113.txt new file mode 100644 index 0000000000000000000000000000000000000000..1852a2411f7c2ffe5a849398a5a80d2fc22214c6 --- /dev/null +++ b/attached_assets/Pasted--base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-model-py-mode-1747243284113.txt @@ -0,0 +1,62 @@ +(base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % python train_mt564_model.py --model_name ./data/models/TinyLlama-1.1B-Chat-v1.0 --training_data ./data/processed/mt564_training_data.json --output_dir ./mt564_tinyllama_model +2025-05-14 10:20:03,424 - INFO - NumExpr defaulting to 8 threads. +Traceback (most recent call last): + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1353, in _get_module + return importlib.import_module("." + module_name, self.__name__) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/anaconda3/lib/python3.11/importlib/__init__.py", line 126, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1206, in _gcd_import + File "", line 1178, in _find_and_load + File "", line 1149, in _find_and_load_unlocked + File "", line 690, in _load_unlocked + File "", line 940, in exec_module + File "", line 241, in _call_with_frames_removed + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/trainer.py", line 167, in + import datasets + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/__init__.py", line 43, in + from .arrow_dataset import Dataset + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 65, in + from .arrow_reader import ArrowReader + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/arrow_reader.py", line 30, in + from .download.download_config import DownloadConfig + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/download/__init__.py", line 10, in + from .streaming_download_manager import StreamingDownloadManager + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/download/streaming_download_manager.py", line 21, in + from ..filesystems import COMPRESSION_FILESYSTEMS + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/filesystems/__init__.py", line 16, in + from .s3filesystem import S3FileSystem # noqa: F401 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/datasets/filesystems/s3filesystem.py", line 1, in + import s3fs + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/s3fs/__init__.py", line 1, in + from .core import S3FileSystem, S3File + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/s3fs/core.py", line 29, in + import aiobotocore.session + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/aiobotocore/session.py", line 1, in + from botocore import UNSIGNED, translate + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/botocore/translate.py", line 16, in + from botocore.utils import merge_dicts + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/botocore/utils.py", line 37, in + import botocore.httpsession + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/botocore/httpsession.py", line 22, in + from urllib3.util.ssl_ import ( +ImportError: cannot import name 'DEFAULT_CIPHERS' from 'urllib3.util.ssl_' (/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl_.py) + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 211, in + main() + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 94, in main + from transformers import ( + File "", line 1231, in _handle_fromlist + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1343, in __getattr__ + module = self._get_module(self._class_to_module[name]) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1355, in _get_module + raise RuntimeError( +RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback): +cannot import name 'DEFAULT_CIPHERS' from 'urllib3.util.ssl_' (/Users/pareshmishra/anaconda3/lib/python3.11/site-packages/urllib3/util/ssl_.py) +(base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % diff --git a/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-mt564-query-py--1747246069204.txt b/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-mt564-query-py--1747246069204.txt new file mode 100644 index 0000000000000000000000000000000000000000..de9aab82c600c51d7ab90042426ed035c3d0acb6 --- /dev/null +++ b/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-mt564-query-py--1747246069204.txt @@ -0,0 +1,85 @@ +(mt564_env) (base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % python mt564_query.py +Loading model... + +A module that was compiled using NumPy 1.x cannot be run in +NumPy 2.2.5 as it may crash. To support both 1.x and 2.x +versions of NumPy, modules must be compiled with NumPy 2.0. +Some module may need to rebuild instead e.g. with 'pybind11>=2.12'. + +If you are a user of the module, the easiest solution will be to +downgrade to 'numpy<2' or try to upgrade the affected module. +We expect that some modules will need time to support NumPy 2. + +Traceback (most recent call last): File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_query.py", line 7, in + model = AutoModelForCausalLM.from_pretrained( + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 484, in from_pretrained + return model_class.from_pretrained( + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2604, in from_pretrained + state_dict = load_state_dict(resolved_archive_file) + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 461, in load_state_dict + return safe_load_file(checkpoint_file) + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/safetensors/torch.py", line 315, in load_file + result[k] = f.get_tensor(k) + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/torch/storage.py", line 234, in __getitem__ + return super().__getitem__(*args, **kwargs) +/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/torch/storage.py:234: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:84.) + return super().__getitem__(*args, **kwargs) +Traceback (most recent call last): + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_query.py", line 7, in + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 484, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2881, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3278, in _load_pretrained_model + raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}") +RuntimeError: Error(s) in loading state_dict for LlamaForCausalLM: + size mismatch for model.layers.0.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.0.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.1.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.1.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.2.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.2.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.3.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.3.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.4.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.4.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.5.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.5.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.6.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.6.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.7.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.7.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.8.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.8.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.9.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.9.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.10.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.10.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.11.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.11.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.12.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.12.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.13.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.13.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.14.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.14.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.15.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.15.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.16.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.16.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.17.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.17.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.18.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.18.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.19.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.19.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.20.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.20.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.21.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.21.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method. +(mt564_env) (base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % diff --git a/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-mod-1747245627089.txt b/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-mod-1747245627089.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee8b898d9abab2bcdb1cc53a901d8f41575c40f2 --- /dev/null +++ b/attached_assets/Pasted--mt564-env-base-pareshmishra-pareshs-MacBook-Air-modeltraining-scrapping-python-train-mt564-mod-1747245627089.txt @@ -0,0 +1,94 @@ +(mt564_env) (base) pareshmishra@pareshs-MacBook-Air modeltraining_scrapping % python train_mt564_model.py --model_name ./data/models/TinyLlama-1.1B-Chat-v1.0 --training_data ./data/processed/mt564_training_data.json --output_dir ./mt564_tinyllama_model +2025-05-14 10:58:12,819 - INFO - Created a temporary directory at /var/folders/mx/gkzn2cwj6ylcqhhpb78gycfh0000gn/T/tmpfzwxh107 +2025-05-14 10:58:12,819 - INFO - Writing /var/folders/mx/gkzn2cwj6ylcqhhpb78gycfh0000gn/T/tmpfzwxh107/_remote_module_non_scriptable.py + +A module that was compiled using NumPy 1.x cannot be run in +NumPy 2.2.5 as it may crash. To support both 1.x and 2.x +versions of NumPy, modules must be compiled with NumPy 2.0. +Some module may need to rebuild instead e.g. with 'pybind11>=2.12'. + +If you are a user of the module, the easiest solution will be to +downgrade to 'numpy<2' or try to upgrade the affected module. +We expect that some modules will need time to support NumPy 2. + +Traceback (most recent call last): File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 211, in + main() + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 94, in main + from transformers import ( + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1076, in __getattr__ + module = self._get_module(self._class_to_module[name]) + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1086, in _get_module + return importlib.import_module("." + module_name, self.__name__) + File "/Users/pareshmishra/anaconda3/lib/python3.11/importlib/__init__.py", line 126, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/trainer.py", line 87, in + from .trainer_pt_utils import ( + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/trainer_pt_utils.py", line 211, in + device: Optional[torch.device] = torch.device("cuda"), +/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/trainer_pt_utils.py:211: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:84.) + device: Optional[torch.device] = torch.device("cuda"), +2025-05-14 10:58:13,398 - INFO - Loading training data from ./data/processed/mt564_training_data.json +2025-05-14 10:58:13,399 - INFO - Formatted 25 training examples +2025-05-14 10:58:13,409 - INFO - Loading tokenizer for ./data/models/TinyLlama-1.1B-Chat-v1.0 +2025-05-14 10:58:13,450 - INFO - Tokenizing datasets +2025-05-14 10:58:13,507 - INFO - Loading model ./data/models/TinyLlama-1.1B-Chat-v1.0 +Traceback (most recent call last): + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 211, in + main() + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/train_mt564_model.py", line 158, in main + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 484, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2881, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/pareshmishra/Documents/wordpress_codes/replite code /modeltraining_datascraper/modeltraining_scrapping/mt564_env/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3278, in _load_pretrained_model + raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}") +RuntimeError: Error(s) in loading state_dict for LlamaForCausalLM: + size mismatch for model.layers.0.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.0.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.1.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.1.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.2.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.2.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.3.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.3.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.4.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.4.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.5.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.5.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.6.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.6.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.7.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.7.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.8.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.8.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.9.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.9.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.10.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.10.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.11.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.11.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.12.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.12.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.13.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.13.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.14.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.14.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.15.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.15.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.16.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.16.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.17.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.17.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.18.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.18.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.19.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.19.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.20.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.20.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.21.self_attn.k_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + size mismatch for model.layers.21.self_attn.v_proj.weight: copying a param with shape torch.Size([256, 2048]) from checkpoint, the shape in current model is torch.Size([2048, 2048]). + You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method. diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..19da98bf02784b6f3eec1274c83f5be49cea2d30 Binary files /dev/null and b/data/.DS_Store differ diff --git a/data/processed/mt564_training_data.json b/data/processed/mt564_training_data.json new file mode 100644 index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc --- /dev/null +++ b/data/processed/mt564_training_data.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/data/raw/mt564_specification.json b/data/raw/mt564_specification.json new file mode 100644 index 0000000000000000000000000000000000000000..57834e72518543ff2c57679a37b3aa8124a3adde --- /dev/null +++ b/data/raw/mt564_specification.json @@ -0,0 +1,571 @@ +{ + "MT564FormatSpecifications": { + "sequences": [ + { + "name": "Sequence A - General Information", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "GENL", + "number": 1 + }, + { + "status": "O", + "tag": "28E", + "qualifier": "", + "fieldName": "Page Number/Continuation Indicator", + "content": "5n/4!c", + "number": 2 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 3 + }, + { + "status": "M", + "tag": "23G", + "qualifier": "", + "fieldName": "Function of the Message", + "content": "4!c[/4!c]", + "number": 4 + }, + { + "status": "M", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 5 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "PREP", + "fieldName": "Date/Time", + "detailedName": "Preparation Date/Time", + "content": "A, C, or E", + "number": 6 + }, + { + "status": "M", + "tag": "25D", + "qualifier": "PROC", + "fieldName": "Status Code", + "detailedName": "Processing Status", + "content": ":4!c/[8c]/4!c", + "number": 7 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "GENL", + "number": 17 + } + ], + "subsequences": [ + { + "name": "Subsequence A1 - Linkages", + "status": "Optional Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "LINK", + "number": 8 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "LINK", + "fieldName": "Indicator", + "detailedName": "Linkage Type Indicator", + "content": ":4!c/[8c]/4!c", + "number": 9 + }, + { + "status": "O", + "tag": "13a", + "qualifier": "LINK", + "fieldName": "Number Identification", + "detailedName": "Linked Message", + "content": "A or B", + "number": 10 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 11 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "LINK", + "number": 12 + } + ] + }, + { + "name": "Subsequence A2 - Reversal Reason", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "REVR", + "number": 13 + }, + { + "status": "M", + "tag": "24B", + "qualifier": "REVR", + "fieldName": "Reason Code", + "detailedName": "Reversal Reason", + "content": ":4!c/[8c]/4!c", + "number": 14 + }, + { + "status": "O", + "tag": "70D", + "qualifier": "REVR", + "fieldName": "Narrative", + "detailedName": "Reversal Reason Narrative", + "content": ":4!c//6*35x", + "number": 15 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "REVR", + "number": 16 + } + ] + } + ] + }, + { + "name": "Sequence B - Underlying Securities", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "USECU", + "number": 18 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 19 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "USECU", + "number": 35 + } + ], + "subsequences": [ + { + "name": "Subsequence B1 - Financial Instrument Attributes", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "FIA", + "number": 20 + }, + { + "status": "O", + "tag": "94B", + "qualifier": "PLIS", + "fieldName": "Place", + "detailedName": "Place of Listing", + "content": ":4!c/[8c]/4!c[/30x]", + "number": 21 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "MICO", + "fieldName": "Indicator", + "detailedName": "Method of Interest Computation Indicator", + "content": ":4!c/[8c]/4!c", + "number": 22 + }, + { + "status": "O", + "tag": "12a", + "qualifier": "4!c", + "fieldName": "Type of Financial Instrument", + "content": "A, B, or C", + "number": 23 + }, + { + "status": "O", + "tag": "11A", + "qualifier": "DENO", + "fieldName": "Currency", + "detailedName": "Currency of Denomination", + "content": ":4!c//3!a", + "number": 24 + }, + { + "status": "O", + "tag": "98A", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": ":4!c//8!n", + "number": 25 + }, + { + "status": "O", + "tag": "92a", + "qualifier": "4!c", + "fieldName": "Rate", + "content": "A, D, or K", + "number": 26 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "4!c", + "fieldName": "Quantity of Financial Instrument", + "content": "B or D", + "number": 27 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "FIA", + "number": 28 + } + ] + }, + { + "name": "Subsequence B2 - Account Information", + "status": "Mandatory Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "ACCTINFO", + "number": 29 + }, + { + "status": "O", + "tag": "95a", + "qualifier": "ACOW", + "fieldName": "Party", + "detailedName": "Account Owner", + "content": "P or R", + "number": 30 + }, + { + "status": "M", + "tag": "97a", + "qualifier": "4!c", + "fieldName": "Account", + "content": "A, C, or D", + "number": 31 + }, + { + "status": "O", + "tag": "94a", + "qualifier": "SAFE", + "fieldName": "Place", + "detailedName": "Place of Safekeeping", + "content": "B, C, or F", + "number": 32 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, E, or F", + "number": 33 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "ACCTINFO", + "number": 34 + } + ] + } + ] + }, + { + "name": "Sequence C - Intermediate Securities", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "INTSEC", + "number": 36 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 37 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "QINT", + "fieldName": "Quantity of Financial Instrument", + "detailedName": "Quantity of Intermediate Securities", + "content": "B, D, or E", + "number": 38 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, or E", + "number": 39 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 40 + }, + { + "status": "O", + "tag": "92D", + "qualifier": "RTUN", + "fieldName": "Rate", + "detailedName": "Intermediate Securities to Underlying", + "content": ":4!c//15d/15d", + "number": 41 + }, + { + "status": "O", + "tag": "90B", + "qualifier": "MRKT", + "fieldName": "Price", + "detailedName": "Market Price", + "content": ":4!c//4!c/3!a15d", + "number": 42 + }, + { + "status": "M", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, or E", + "number": 43 + }, + { + "status": "O", + "tag": "69a", + "qualifier": "TRDP", + "fieldName": "Period", + "detailedName": "Trading Period", + "content": "A, B, C, D, E, or F", + "number": 44 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "INTSEC", + "number": 45 + } + ] + }, + { + "name": "Sequence D - Corporate Action Details", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "CADETL", + "number": 46 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, C, E, or F", + "number": 47 + }, + { + "status": "O", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 48 + }, + { + "status": "O", + "tag": "17B", + "qualifier": "4!c", + "fieldName": "Flag", + "content": ":4!c//1!a", + "number": 49 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "CADETL", + "number": 50 + } + ] + }, + { + "name": "Sequence E - Corporate Action Options", + "status": "Optional Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "CAOPTN", + "number": 51 + }, + { + "status": "M", + "tag": "13A", + "qualifier": "CAON", + "fieldName": "Number Identification", + "detailedName": "CA Option Number", + "content": ":4!c//3!c", + "number": 52 + }, + { + "status": "M", + "tag": "22F", + "qualifier": "CAOP", + "fieldName": "Indicator", + "detailedName": "Option Type", + "content": ":4!c/[8c]/4!c", + "number": 53 + }, + { + "status": "O", + "tag": "11A", + "qualifier": "OPTN", + "fieldName": "Currency", + "detailedName": "Currency Option", + "content": ":4!c//3!a", + "number": 54 + }, + { + "status": "O", + "tag": "17B", + "qualifier": "DFLT", + "fieldName": "Flag", + "detailedName": "Default Processing Flag", + "content": ":4!c//1!a", + "number": 55 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "CAOPTN", + "number": 56 + } + ] + }, + { + "name": "Sequence F - Additional Information", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "ADDINFO", + "number": 57 + }, + { + "status": "O", + "tag": "70E", + "qualifier": "ADTX", + "fieldName": "Narrative", + "detailedName": "Additional Text", + "content": ":4!c//10*35x", + "number": 58 + }, + { + "status": "O", + "tag": "70E", + "qualifier": "DISC", + "fieldName": "Narrative", + "detailedName": "Disclaimer", + "content": ":4!c//10*35x", + "number": 59 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "ADDINFO", + "number": 60 + } + ] + } + ] + } +} \ No newline at end of file diff --git a/dependencies.txt b/dependencies.txt new file mode 100644 index 0000000000000000000000000000000000000000..a81e95363f4b070e3de82c70b6cedc2a2731a76f --- /dev/null +++ b/dependencies.txt @@ -0,0 +1,42 @@ +# Core machine learning libraries +torch>=1.10.0 +transformers>=4.30.0 +datasets>=2.10.0 +accelerate>=0.19.0 + +# TinyLlama and LLM dependencies +huggingface_hub>=0.14.0 +tokenizers>=0.13.0 +peft>=0.4.0 # For parameter-efficient fine-tuning +bitsandbytes>=0.38.0 # For quantization support + +# Web scraping and data processing +requests>=2.28.0 +beautifulsoup4>=4.11.0 +trafilatura>=1.4.0 +lxml>=4.9.0 + +# PDF processing +PyPDF2>=3.0.0 +pdf2image>=1.16.0 +pytesseract>=0.3.10 # Requires tesseract-ocr system package + +# Web application +flask>=2.2.0 +flask-cors>=3.0.10 +Werkzeug>=2.2.0 +python-dotenv>=1.0.0 + +# Utilities +numpy>=1.23.0 +pandas>=1.5.0 +tqdm>=4.64.0 +nltk>=3.7.0 +rouge>=1.0.1 # For evaluation +scipy>=1.9.0 +scikit-learn>=1.1.0 +Pillow>=9.2.0 # For image processing + +# Documentation +Markdown>=3.4.0flask_sqlalchemy +schedule diff --git a/evaluate_model.py b/evaluate_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8a47c8efb3b59b17ced61ad646ccb56a33820b9a --- /dev/null +++ b/evaluate_model.py @@ -0,0 +1,204 @@ +""" +Evaluation Script for TinyLlama Models +This script helps evaluate the performance of a trained TinyLlama model. +""" + +import os +import argparse +import torch +import numpy as np +from transformers import AutoModelForCausalLM, AutoTokenizer +from datasets import load_dataset +import logging +from tqdm import tqdm + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Evaluate a TinyLlama model") + parser.add_argument( + "--model_path", + type=str, + required=True, + help="Path to the fine-tuned model directory" + ) + parser.add_argument( + "--eval_data", + type=str, + required=True, + help="Path to evaluation data file (JSON or CSV)" + ) + parser.add_argument( + "--prompt_column", + type=str, + default="prompt", + help="Column name containing the prompts" + ) + parser.add_argument( + "--completion_column", + type=str, + default="completion", + help="Column name containing the expected completions (optional)" + ) + parser.add_argument( + "--max_new_tokens", + type=int, + default=256, + help="Maximum number of tokens to generate" + ) + parser.add_argument( + "--temperature", + type=float, + default=0.7, + help="Sampling temperature" + ) + parser.add_argument( + "--top_p", + type=float, + default=0.9, + help="Top-p sampling parameter" + ) + parser.add_argument( + "--output_file", + type=str, + default="evaluation_results.json", + help="Path to save evaluation results" + ) + parser.add_argument( + "--batch_size", + type=int, + default=4, + help="Batch size for evaluation" + ) + return parser.parse_args() + +def generate_responses(model, tokenizer, prompts, args): + """Generate responses from the model for a list of prompts""" + responses = [] + + # Process in batches + for i in tqdm(range(0, len(prompts), args.batch_size), desc="Generating responses"): + batch_prompts = prompts[i:i+args.batch_size] + + inputs = tokenizer(batch_prompts, return_tensors="pt", padding=True) + inputs = {k: v.to(model.device) for k, v in inputs.items()} + + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + top_p=args.top_p, + do_sample=True, + pad_token_id=tokenizer.eos_token_id + ) + + # Decode and extract only the newly generated text + for j, output in enumerate(outputs): + input_length = inputs.input_ids[j].size(0) + generated_text = tokenizer.decode(output[input_length:], skip_special_tokens=True) + responses.append(generated_text) + + return responses + +def calculate_metrics(generated_texts, reference_texts): + """Calculate evaluation metrics if reference texts are available""" + metrics = {} + + try: + from rouge import Rouge + from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction + import nltk + nltk.download('punkt', quiet=True) + + # Calculate ROUGE scores + rouge = Rouge() + rouge_scores = rouge.get_scores(generated_texts, reference_texts, avg=True) + + # Calculate BLEU scores + bleu_scores = [] + for gen, ref in zip(generated_texts, reference_texts): + gen_tokens = nltk.word_tokenize(gen.lower()) + ref_tokens = [nltk.word_tokenize(ref.lower())] + if gen_tokens and ref_tokens[0]: + score = sentence_bleu(ref_tokens, gen_tokens, + smoothing_function=SmoothingFunction().method1) + bleu_scores.append(score) + + avg_bleu = np.mean(bleu_scores) if bleu_scores else 0 + + metrics = { + "rouge": rouge_scores, + "bleu": avg_bleu + } + except ImportError: + logger.warning("Rouge or NLTK not installed. Skipping metric calculation.") + metrics = {"note": "Metrics calculation skipped due to missing dependencies"} + + return metrics + +def main(): + args = parse_args() + + # Load model and tokenizer + logger.info(f"Loading model from: {args.model_path}") + model = AutoModelForCausalLM.from_pretrained(args.model_path) + tokenizer = AutoTokenizer.from_pretrained(args.model_path) + + # Move model to GPU if available + device = "cuda" if torch.cuda.is_available() else "cpu" + model = model.to(device) + model.eval() + + # Load evaluation data + logger.info(f"Loading evaluation data from: {args.eval_data}") + data_extension = os.path.splitext(args.eval_data)[1].replace(".", "") + eval_dataset = load_dataset(data_extension, data_files=args.eval_data)["train"] + + # Get prompts and expected completions + prompts = eval_dataset[args.prompt_column] + + has_completions = args.completion_column in eval_dataset.column_names + completions = eval_dataset[args.completion_column] if has_completions else None + + # Generate responses + logger.info("Generating responses...") + generated_texts = generate_responses(model, tokenizer, prompts, args) + + # Calculate metrics if completions are available + metrics = {} + if has_completions: + logger.info("Calculating evaluation metrics...") + metrics = calculate_metrics(generated_texts, completions) + + # Prepare results + results = { + "model_path": args.model_path, + "samples": [ + {"prompt": prompt, "generated": generated, "reference": ref} + for prompt, generated, ref in zip( + prompts, + generated_texts, + completions if has_completions else [""] * len(prompts) + ) + ], + "metrics": metrics + } + + # Save results + import json + with open(args.output_file, "w") as f: + json.dump(results, f, indent=2) + + logger.info(f"Evaluation complete. Results saved to {args.output_file}") + + # Print summary + if metrics and "rouge" in metrics: + logger.info(f"ROUGE-1: {metrics['rouge']['rouge-1']['f']:.4f}") + logger.info(f"ROUGE-2: {metrics['rouge']['rouge-2']['f']:.4f}") + logger.info(f"ROUGE-L: {metrics['rouge']['rouge-l']['f']:.4f}") + logger.info(f"BLEU: {metrics['bleu']:.4f}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/example_training_data.json b/example_training_data.json new file mode 100644 index 0000000000000000000000000000000000000000..d6809659848604d1eeed55ac5aa0a8fbf06b821e --- /dev/null +++ b/example_training_data.json @@ -0,0 +1,22 @@ +[ + { + "instruction": "Explain the process of photosynthesis in simple terms.", + "response": "Photosynthesis is how plants make their own food. They take in sunlight, water, and carbon dioxide, and convert these ingredients into oxygen and glucose (sugar). The sunlight provides energy for this process, water comes from the roots, and carbon dioxide is absorbed from the air through tiny pores called stomata. The sugar created is used as food for the plant, and the oxygen is released back into the air, which is great for us since we need oxygen to breathe!" + }, + { + "instruction": "What is machine learning?", + "response": "Machine learning is a subset of artificial intelligence that enables computers to learn and improve from experience without being explicitly programmed. Instead of writing specific instructions for every situation, programmers create algorithms that allow computers to analyze data, identify patterns, and make decisions with minimal human intervention. Machine learning systems improve over time as they process more data, becoming increasingly accurate at tasks like recognizing images, translating languages, recommending products, or predicting outcomes." + }, + { + "instruction": "Write a short poem about the moon.", + "response": "Silver guardian of the night,\nCasting shadows, pale and bright.\nEver changing, yet the same,\nPlaying cosmic rhythm's game.\n\nPulling tides with gentle might,\nGuiding dreamers through the night.\nAncient witness to our story,\nFloating in your silent glory." + }, + { + "instruction": "How do electric cars work?", + "response": "Electric cars work by using electricity stored in rechargeable batteries to power an electric motor that turns the wheels. Unlike conventional vehicles that use internal combustion engines burning gasoline or diesel, electric cars have a large battery pack, an electric motor (or multiple motors), and a controller that regulates power delivery. When you press the accelerator, the controller sends electricity from the batteries to the motor, which converts electrical energy into mechanical energy to move the car. When you brake, many electric cars use regenerative braking, which recaptures some energy and returns it to the battery. Electric cars are charged by plugging into charging stations or home outlets, similar to how you charge a smartphone." + }, + { + "instruction": "Explain the water cycle.", + "response": "The water cycle is the continuous movement of water on, above, and below Earth's surface. It begins with evaporation, where heat from the sun turns liquid water from oceans, lakes, and rivers into water vapor that rises into the air. As this vapor rises, it cools and undergoes condensation, forming clouds. When the water droplets in clouds become too heavy, they fall back to Earth as precipitation (rain, snow, sleet, or hail). Some of this water collects in bodies of water, some seeps into the ground as groundwater, and some is absorbed by plants, which release water vapor back into the air through transpiration. This ongoing cycle continuously recycles Earth's water supply, ensuring that fresh water is available for all living things." + } +] \ No newline at end of file diff --git a/instance/horoscope.db b/instance/horoscope.db new file mode 100644 index 0000000000000000000000000000000000000000..aeae1046787c5d3328b50ae43bc8152cde04c72b Binary files /dev/null and b/instance/horoscope.db differ diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..0e1f1492472d6d4646153146d75f25a7e7f9e114 --- /dev/null +++ b/main.py @@ -0,0 +1,16 @@ +import logging +from app import app +from api.routes import register_routes +from api.mt564_routes import register_mt564_routes + +# Set up logging +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + +# Register API routes +register_routes(app) +register_mt564_routes(app) + +if __name__ == "__main__": + logger.info("Starting the web scraper API server") + app.run(host="0.0.0.0", port=5001, debug=True) diff --git a/model/download_tinyllama.py b/model/download_tinyllama.py new file mode 100644 index 0000000000000000000000000000000000000000..24385e1fea630fa3ac68364de7c6568e5c0692ce --- /dev/null +++ b/model/download_tinyllama.py @@ -0,0 +1,152 @@ +""" +Download TinyLlama Model + +This script downloads the TinyLlama model from Hugging Face and prepares it +for fine-tuning on SWIFT MT564 documentation. + +Usage: + python download_tinyllama.py --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./data/models +""" + +import os +import argparse +import logging +from typing import Optional + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Download TinyLlama model from Hugging Face") + parser.add_argument( + "--model_name", + type=str, + default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + help="Name of the TinyLlama model on Hugging Face Hub" + ) + parser.add_argument( + "--output_dir", + type=str, + default="./data/models", + help="Directory to save the downloaded model" + ) + parser.add_argument( + "--use_auth_token", + action="store_true", + help="Use Hugging Face authentication token for downloading gated models" + ) + parser.add_argument( + "--branch", + type=str, + default="main", + help="Branch of the model repository to download from" + ) + parser.add_argument( + "--check_integrity", + action="store_true", + help="Verify integrity of downloaded files" + ) + return parser.parse_args() + +def download_model( + model_name: str, + output_dir: str, + use_auth_token: bool = False, + branch: str = "main", + check_integrity: bool = False +) -> Optional[str]: + """ + Download model and tokenizer from Hugging Face Hub + + Args: + model_name: Name of the model on Hugging Face Hub + output_dir: Directory to save the model + use_auth_token: Whether to use Hugging Face token for gated models + branch: Branch of the model repository + check_integrity: Whether to verify integrity of downloaded files + + Returns: + Path to the downloaded model or None if download failed + """ + try: + # Import libraries here so the script doesn't fail if they're not installed + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer + from huggingface_hub import snapshot_download + + logger.info(f"Downloading model: {model_name}") + os.makedirs(output_dir, exist_ok=True) + + # Create model directory + model_output_dir = os.path.join(output_dir, model_name.split('/')[-1]) + os.makedirs(model_output_dir, exist_ok=True) + + # Option 1: Use snapshot_download for more control + if check_integrity: + logger.info("Using snapshot_download with integrity checking") + snapshot_download( + repo_id=model_name, + local_dir=model_output_dir, + use_auth_token=use_auth_token if use_auth_token else None, + revision=branch + ) + + # Option 2: Use Transformers' download mechanism + else: + logger.info("Using Transformers' auto classes for downloading") + # Download and save tokenizer + tokenizer = AutoTokenizer.from_pretrained( + model_name, + use_auth_token=use_auth_token if use_auth_token else None, + revision=branch + ) + tokenizer.save_pretrained(model_output_dir) + logger.info(f"Tokenizer saved to {model_output_dir}") + + # Download and save model + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + use_auth_token=use_auth_token if use_auth_token else None, + revision=branch, + low_cpu_mem_usage=True + ) + model.save_pretrained(model_output_dir) + logger.info(f"Model saved to {model_output_dir}") + + logger.info(f"Successfully downloaded model to {model_output_dir}") + return model_output_dir + + except ImportError as e: + logger.error(f"Required libraries not installed: {e}") + logger.error("Please install required packages: pip install torch transformers huggingface_hub") + return None + except Exception as e: + logger.error(f"Error downloading model: {e}") + return None + +def main(): + args = parse_args() + + # Check if HUGGING_FACE_TOKEN environment variable is set + if args.use_auth_token and "HUGGING_FACE_TOKEN" not in os.environ: + logger.warning("--use_auth_token flag is set but HUGGING_FACE_TOKEN environment variable is not found.") + logger.warning("You can set it using: export HUGGING_FACE_TOKEN=your_token_here") + + # Download the model + model_path = download_model( + model_name=args.model_name, + output_dir=args.output_dir, + use_auth_token=args.use_auth_token, + branch=args.branch, + check_integrity=args.check_integrity + ) + + if model_path: + logger.info(f"Model downloaded successfully to: {model_path}") + logger.info("You can now use this model for fine-tuning with the train_mt564_model.py script.") + else: + logger.error("Failed to download the model.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model/upload_to_huggingface.py b/model/upload_to_huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..68f8560f229b0a73c7740e03ae55d4a3ceb6d311 --- /dev/null +++ b/model/upload_to_huggingface.py @@ -0,0 +1,317 @@ +""" +Upload Fine-tuned TinyLlama Model to Hugging Face Hub + +This script uploads a fine-tuned TinyLlama model to the Hugging Face Hub. +It handles authentication, model card creation, and repository management. + +Usage: + python upload_to_huggingface.py --model_dir ./mt564_tinyllama_model --repo_name username/mt564-tinyllama +""" + +import os +import argparse +import logging +from datetime import datetime +from typing import Optional, List, Dict, Any + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Upload fine-tuned TinyLlama model to Hugging Face Hub") + parser.add_argument( + "--model_dir", + type=str, + required=True, + help="Directory containing the fine-tuned model" + ) + parser.add_argument( + "--repo_name", + type=str, + required=True, + help="Name for the Hugging Face repository (format: username/repo-name)" + ) + parser.add_argument( + "--commit_message", + type=str, + default=f"Upload fine-tuned TinyLlama model - {datetime.now().strftime('%Y-%m-%d')}", + help="Commit message for the model upload" + ) + parser.add_argument( + "--private", + action="store_true", + help="Make the repository private" + ) + parser.add_argument( + "--create_model_card", + action="store_true", + default=True, + help="Create a model card README.md" + ) + parser.add_argument( + "--base_model", + type=str, + default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + help="Base model name used for fine-tuning" + ) + parser.add_argument( + "--tags", + type=str, + nargs="+", + default=["swift", "mt564", "financial", "tinyllama", "finance"], + help="Tags for the model" + ) + return parser.parse_args() + +def create_model_card( + base_model: str, + repo_name: str, + dataset_info: str = "SWIFT MT564 documentation", + tags: List[str] = None, + training_details: Dict[str, Any] = None +) -> str: + """ + Create a model card for the Hugging Face Hub + + Args: + base_model: Name of the base model used for fine-tuning + repo_name: Name of the Hugging Face repository + dataset_info: Information about the dataset used + tags: Tags for the model + training_details: Dictionary with training hyperparameters + + Returns: + Model card content as a string + """ + if tags is None: + tags = ["swift", "mt564", "finance", "tinyllama"] + + if training_details is None: + training_details = { + "epochs": 3, + "learning_rate": "2e-5", + "batch_size": 2, + "gradient_accumulation_steps": 4, + "training_date": datetime.now().strftime("%Y-%m-%d") + } + + repo_owner, repo_id = repo_name.split('/') + + model_card = f"""--- +language: en +license: apache-2.0 +tags: +{chr(10).join([f'- {tag}' for tag in tags])} +datasets: +- custom +metrics: +- accuracy +--- + +# {repo_id} + +This is a fine-tuned version of [{base_model}](https://huggingface.co/{base_model}) specialized for understanding SWIFT MT564 message formats and financial documentation. + +## Model Description + +This model was fine-tuned on SWIFT MT564 documentation to help financial professionals understand and work with Corporate Action Notification messages. It can answer questions about message structure, field specifications, and usage guidelines for MT564 messages. + +### Base Model + +- **Base Model**: {base_model} +- **Model Type**: TinyLlama +- **Language**: English +- **Fine-tuning Focus**: SWIFT financial messaging formats, particularly MT564 + +## Training Data + +The model was fine-tuned on the following data: +- {dataset_info} +- The data includes message specifications, field descriptions, sequence structures, and usage guidelines + +## Training Procedure + +The model was fine-tuned with the following parameters: +- **Epochs**: {training_details['epochs']} +- **Learning Rate**: {training_details['learning_rate']} +- **Batch Size**: {training_details['batch_size']} +- **Gradient Accumulation Steps**: {training_details['gradient_accumulation_steps']} +- **Training Date**: {training_details['training_date']} + +## Intended Use & Limitations + +This model is specifically designed to: +- Answer questions about SWIFT MT564 message formats +- Assist with understanding Corporate Action Notifications +- Help parse and interpret MT564 messages + +**Limitations**: +- This model specializes in MT564 and may have limited knowledge of other SWIFT message types +- The model should not be used for generating actual SWIFT messages for production systems +- Always verify critical financial information with official SWIFT documentation + +## Usage + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Load model and tokenizer +model = AutoModelForCausalLM.from_pretrained("{repo_name}") +tokenizer = AutoTokenizer.from_pretrained("{repo_name}") + +# Format prompt for the chat model +prompt = "<|im_start|>user\\nWhat is the purpose of Sequence A in MT564 messages?<|im_end|>\\n<|im_start|>assistant\\n" + +# Tokenize and generate response +inputs = tokenizer(prompt, return_tensors="pt") +outputs = model.generate(inputs.input_ids, max_new_tokens=256, temperature=0.7) +response = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(response) +``` + +## Citation & Contact + +If you use this model, please cite: +``` +@misc{{{repo_id.replace('-', '_').lower()}}}, + author = {{{repo_owner}}}, + title = {{{repo_id} - A fine-tuned TinyLlama model for SWIFT MT564 documentation}}, + year = {{{datetime.now().year}}}, + publisher = {Hugging Face}, + journal = {Hugging Face Repository}, + howpublished = {{https://huggingface.co/{repo_name}}}, +} +``` + +For questions or feedback, please reach out through the [Hugging Face community](https://discuss.huggingface.co/) or the GitHub repository linked to this project. +""" + return model_card + +def upload_to_hub( + model_dir: str, + repo_name: str, + commit_message: str = "Upload fine-tuned model", + private: bool = False, + create_card: bool = True, + base_model: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + tags: List[str] = None +) -> bool: + """ + Upload model to Hugging Face Hub + + Args: + model_dir: Directory containing the fine-tuned model + repo_name: Name for the Hugging Face repository (username/repo-name) + commit_message: Commit message for the upload + private: Whether to make the repository private + create_card: Whether to create a model card + base_model: Base model used for fine-tuning + tags: Tags for the model + + Returns: + Success status (True if upload was successful) + """ + try: + # Import libraries here so the script doesn't fail if they're not installed + from huggingface_hub import HfApi, create_repo + + # Check if HUGGING_FACE_TOKEN environment variable is set + token = os.environ.get("HUGGING_FACE_TOKEN") + if not token: + logger.error("HUGGING_FACE_TOKEN environment variable is not set.") + logger.error("Set it using: export HUGGING_FACE_TOKEN=your_token_here") + return False + + api = HfApi(token=token) + logger.info(f"Authenticated with Hugging Face Hub") + + # Create repository if it doesn't exist + try: + repo_url = create_repo( + repo_id=repo_name, + private=private, + token=token, + exist_ok=True + ) + logger.info(f"Repository created/accessed: {repo_url}") + except Exception as e: + logger.error(f"Error creating repository: {e}") + return False + + # Create and save model card if requested + if create_card: + logger.info("Creating model card") + model_card_content = create_model_card( + base_model=base_model, + repo_name=repo_name, + tags=tags + ) + + model_card_path = os.path.join(model_dir, "README.md") + with open(model_card_path, "w", encoding="utf-8") as f: + f.write(model_card_content) + logger.info(f"Model card saved to {model_card_path}") + + # Upload model to Hub + logger.info(f"Uploading model from {model_dir} to {repo_name}") + api.upload_folder( + folder_path=model_dir, + repo_id=repo_name, + commit_message=commit_message + ) + + logger.info(f"Model successfully uploaded to {repo_name}") + logger.info(f"View your model at: https://huggingface.co/{repo_name}") + return True + + except ImportError as e: + logger.error(f"Required libraries not installed: {e}") + logger.error("Please install huggingface_hub: pip install huggingface_hub") + return False + except Exception as e: + logger.error(f"Error uploading model: {e}") + return False + +def main(): + args = parse_args() + + # Check if model directory exists + if not os.path.isdir(args.model_dir): + logger.error(f"Model directory {args.model_dir} does not exist") + return + + # Check if model files exist + model_files = [ + "pytorch_model.bin", "config.json", + "generation_config.json", "tokenizer_config.json", + "tokenizer.json", "special_tokens_map.json" + ] + missing_files = [f for f in model_files if not os.path.exists(os.path.join(args.model_dir, f))] + + if missing_files: + logger.warning(f"The following model files are missing: {', '.join(missing_files)}") + logger.warning("The model might be in a different format or incomplete") + + # Upload to Hugging Face Hub + logger.info(f"Uploading model from {args.model_dir} to {args.repo_name}") + success = upload_to_hub( + model_dir=args.model_dir, + repo_name=args.repo_name, + commit_message=args.commit_message, + private=args.private, + create_card=args.create_model_card, + base_model=args.base_model, + tags=args.tags + ) + + if success: + logger.info(f"Model upload complete! Your model is now available at: https://huggingface.co/{args.repo_name}") + logger.info("You can use it with Transformers library:") + logger.info(f"from transformers import AutoModelForCausalLM, AutoTokenizer") + logger.info(f"model = AutoModelForCausalLM.from_pretrained('{args.repo_name}')") + logger.info(f"tokenizer = AutoTokenizer.from_pretrained('{args.repo_name}')") + else: + logger.error("Model upload failed. Please check the error messages above.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models.py b/models.py new file mode 100644 index 0000000000000000000000000000000000000000..8d1cd0c327470994816625a4ff8ab6d6d17bda34 --- /dev/null +++ b/models.py @@ -0,0 +1,105 @@ +import os +from datetime import datetime +from flask_sqlalchemy import SQLAlchemy +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass + + +db = SQLAlchemy(model_class=Base) + + +class Horoscope(db.Model): + """Model for storing horoscope data""" + id = db.Column(db.Integer, primary_key=True) + sign = db.Column(db.String(20), nullable=False) + date = db.Column(db.Date, nullable=False) + prediction = db.Column(db.Text, nullable=False) + source = db.Column(db.String(100), nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f"" + + def to_dict(self): + return { + "id": self.id, + "sign": self.sign, + "date": self.date.isoformat(), + "prediction": self.prediction, + "source": self.source, + "created_at": self.created_at.isoformat() + } + + +class ConsolidatedHoroscope(db.Model): + """Model for storing LLM-consolidated horoscope data""" + id = db.Column(db.Integer, primary_key=True) + sign = db.Column(db.String(20), nullable=False) + date = db.Column(db.Date, nullable=False) + consolidated_prediction = db.Column(db.Text, nullable=False) + sources = db.Column(db.Text, nullable=False) # JSON string of source URLs + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f"" + + def to_dict(self): + return { + "id": self.id, + "sign": self.sign, + "date": self.date.isoformat(), + "consolidated_prediction": self.consolidated_prediction, + "sources": self.sources, + "created_at": self.created_at.isoformat() + } + + +class ScheduledJob(db.Model): + """Model for tracking scheduled scraping jobs""" + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.String(100), nullable=False, unique=True) + frequency = db.Column(db.String(50), nullable=False) # daily, hourly, etc. + last_run = db.Column(db.DateTime, nullable=True) + next_run = db.Column(db.DateTime, nullable=True) + enabled = db.Column(db.Boolean, default=True) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f"" + + def to_dict(self): + return { + "id": self.id, + "name": self.name, + "frequency": self.frequency, + "last_run": self.last_run.isoformat() if self.last_run else None, + "next_run": self.next_run.isoformat() if self.next_run else None, + "enabled": self.enabled, + "created_at": self.created_at.isoformat() + } + + +class WordPressExport(db.Model): + """Model for tracking WordPress exports""" + id = db.Column(db.Integer, primary_key=True) + horoscope_id = db.Column(db.Integer, db.ForeignKey("consolidated_horoscope.id"), nullable=False) + wordpress_post_id = db.Column(db.Integer, nullable=True) + wordpress_url = db.Column(db.String(255), nullable=True) + status = db.Column(db.String(50), default="pending") # pending, published, failed + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f"" + + def to_dict(self): + return { + "id": self.id, + "horoscope_id": self.horoscope_id, + "wordpress_post_id": self.wordpress_post_id, + "wordpress_url": self.wordpress_url, + "status": self.status, + "created_at": self.created_at.isoformat() + } \ No newline at end of file diff --git a/mt564_specification.json b/mt564_specification.json new file mode 100644 index 0000000000000000000000000000000000000000..57834e72518543ff2c57679a37b3aa8124a3adde --- /dev/null +++ b/mt564_specification.json @@ -0,0 +1,571 @@ +{ + "MT564FormatSpecifications": { + "sequences": [ + { + "name": "Sequence A - General Information", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "GENL", + "number": 1 + }, + { + "status": "O", + "tag": "28E", + "qualifier": "", + "fieldName": "Page Number/Continuation Indicator", + "content": "5n/4!c", + "number": 2 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 3 + }, + { + "status": "M", + "tag": "23G", + "qualifier": "", + "fieldName": "Function of the Message", + "content": "4!c[/4!c]", + "number": 4 + }, + { + "status": "M", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 5 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "PREP", + "fieldName": "Date/Time", + "detailedName": "Preparation Date/Time", + "content": "A, C, or E", + "number": 6 + }, + { + "status": "M", + "tag": "25D", + "qualifier": "PROC", + "fieldName": "Status Code", + "detailedName": "Processing Status", + "content": ":4!c/[8c]/4!c", + "number": 7 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "GENL", + "number": 17 + } + ], + "subsequences": [ + { + "name": "Subsequence A1 - Linkages", + "status": "Optional Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "LINK", + "number": 8 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "LINK", + "fieldName": "Indicator", + "detailedName": "Linkage Type Indicator", + "content": ":4!c/[8c]/4!c", + "number": 9 + }, + { + "status": "O", + "tag": "13a", + "qualifier": "LINK", + "fieldName": "Number Identification", + "detailedName": "Linked Message", + "content": "A or B", + "number": 10 + }, + { + "status": "M", + "tag": "20C", + "qualifier": "4!c", + "fieldName": "Reference", + "content": ":4!c//16x", + "number": 11 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "LINK", + "number": 12 + } + ] + }, + { + "name": "Subsequence A2 - Reversal Reason", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "REVR", + "number": 13 + }, + { + "status": "M", + "tag": "24B", + "qualifier": "REVR", + "fieldName": "Reason Code", + "detailedName": "Reversal Reason", + "content": ":4!c/[8c]/4!c", + "number": 14 + }, + { + "status": "O", + "tag": "70D", + "qualifier": "REVR", + "fieldName": "Narrative", + "detailedName": "Reversal Reason Narrative", + "content": ":4!c//6*35x", + "number": 15 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "REVR", + "number": 16 + } + ] + } + ] + }, + { + "name": "Sequence B - Underlying Securities", + "status": "Mandatory", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "USECU", + "number": 18 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 19 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "USECU", + "number": 35 + } + ], + "subsequences": [ + { + "name": "Subsequence B1 - Financial Instrument Attributes", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "FIA", + "number": 20 + }, + { + "status": "O", + "tag": "94B", + "qualifier": "PLIS", + "fieldName": "Place", + "detailedName": "Place of Listing", + "content": ":4!c/[8c]/4!c[/30x]", + "number": 21 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "MICO", + "fieldName": "Indicator", + "detailedName": "Method of Interest Computation Indicator", + "content": ":4!c/[8c]/4!c", + "number": 22 + }, + { + "status": "O", + "tag": "12a", + "qualifier": "4!c", + "fieldName": "Type of Financial Instrument", + "content": "A, B, or C", + "number": 23 + }, + { + "status": "O", + "tag": "11A", + "qualifier": "DENO", + "fieldName": "Currency", + "detailedName": "Currency of Denomination", + "content": ":4!c//3!a", + "number": 24 + }, + { + "status": "O", + "tag": "98A", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": ":4!c//8!n", + "number": 25 + }, + { + "status": "O", + "tag": "92a", + "qualifier": "4!c", + "fieldName": "Rate", + "content": "A, D, or K", + "number": 26 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "4!c", + "fieldName": "Quantity of Financial Instrument", + "content": "B or D", + "number": 27 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "FIA", + "number": 28 + } + ] + }, + { + "name": "Subsequence B2 - Account Information", + "status": "Mandatory Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "ACCTINFO", + "number": 29 + }, + { + "status": "O", + "tag": "95a", + "qualifier": "ACOW", + "fieldName": "Party", + "detailedName": "Account Owner", + "content": "P or R", + "number": 30 + }, + { + "status": "M", + "tag": "97a", + "qualifier": "4!c", + "fieldName": "Account", + "content": "A, C, or D", + "number": 31 + }, + { + "status": "O", + "tag": "94a", + "qualifier": "SAFE", + "fieldName": "Place", + "detailedName": "Place of Safekeeping", + "content": "B, C, or F", + "number": 32 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, E, or F", + "number": 33 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "ACCTINFO", + "number": 34 + } + ] + } + ] + }, + { + "name": "Sequence C - Intermediate Securities", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "INTSEC", + "number": 36 + }, + { + "status": "M", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 37 + }, + { + "status": "O", + "tag": "36a", + "qualifier": "QINT", + "fieldName": "Quantity of Financial Instrument", + "detailedName": "Quantity of Intermediate Securities", + "content": "B, D, or E", + "number": 38 + }, + { + "status": "O", + "tag": "93a", + "qualifier": "4!c", + "fieldName": "Balance", + "content": "B, C, or E", + "number": 39 + }, + { + "status": "O", + "tag": "22F", + "qualifier": "4!c", + "fieldName": "Indicator", + "content": ":4!c/[8c]/4!c", + "number": 40 + }, + { + "status": "O", + "tag": "92D", + "qualifier": "RTUN", + "fieldName": "Rate", + "detailedName": "Intermediate Securities to Underlying", + "content": ":4!c//15d/15d", + "number": 41 + }, + { + "status": "O", + "tag": "90B", + "qualifier": "MRKT", + "fieldName": "Price", + "detailedName": "Market Price", + "content": ":4!c//4!c/3!a15d", + "number": 42 + }, + { + "status": "M", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, or E", + "number": 43 + }, + { + "status": "O", + "tag": "69a", + "qualifier": "TRDP", + "fieldName": "Period", + "detailedName": "Trading Period", + "content": "A, B, C, D, E, or F", + "number": 44 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "INTSEC", + "number": 45 + } + ] + }, + { + "name": "Sequence D - Corporate Action Details", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "CADETL", + "number": 46 + }, + { + "status": "O", + "tag": "98a", + "qualifier": "4!c", + "fieldName": "Date/Time", + "content": "A, B, C, E, or F", + "number": 47 + }, + { + "status": "O", + "tag": "35B", + "qualifier": "", + "fieldName": "Identification of the Financial Instrument", + "content": "[ISIN1!e12!c]\n[4*35x]", + "number": 48 + }, + { + "status": "O", + "tag": "17B", + "qualifier": "4!c", + "fieldName": "Flag", + "content": ":4!c//1!a", + "number": 49 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "CADETL", + "number": 50 + } + ] + }, + { + "name": "Sequence E - Corporate Action Options", + "status": "Optional Repetitive", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "CAOPTN", + "number": 51 + }, + { + "status": "M", + "tag": "13A", + "qualifier": "CAON", + "fieldName": "Number Identification", + "detailedName": "CA Option Number", + "content": ":4!c//3!c", + "number": 52 + }, + { + "status": "M", + "tag": "22F", + "qualifier": "CAOP", + "fieldName": "Indicator", + "detailedName": "Option Type", + "content": ":4!c/[8c]/4!c", + "number": 53 + }, + { + "status": "O", + "tag": "11A", + "qualifier": "OPTN", + "fieldName": "Currency", + "detailedName": "Currency Option", + "content": ":4!c//3!a", + "number": 54 + }, + { + "status": "O", + "tag": "17B", + "qualifier": "DFLT", + "fieldName": "Flag", + "detailedName": "Default Processing Flag", + "content": ":4!c//1!a", + "number": 55 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "CAOPTN", + "number": 56 + } + ] + }, + { + "name": "Sequence F - Additional Information", + "status": "Optional", + "fields": [ + { + "status": "M", + "tag": "16R", + "qualifier": "", + "fieldName": "Start of Block", + "content": "ADDINFO", + "number": 57 + }, + { + "status": "O", + "tag": "70E", + "qualifier": "ADTX", + "fieldName": "Narrative", + "detailedName": "Additional Text", + "content": ":4!c//10*35x", + "number": 58 + }, + { + "status": "O", + "tag": "70E", + "qualifier": "DISC", + "fieldName": "Narrative", + "detailedName": "Disclaimer", + "content": ":4!c//10*35x", + "number": 59 + }, + { + "status": "M", + "tag": "16S", + "qualifier": "", + "fieldName": "End of Block", + "content": "ADDINFO", + "number": 60 + } + ] + } + ] + } +} \ No newline at end of file diff --git a/mt564_training_data.json b/mt564_training_data.json new file mode 100644 index 0000000000000000000000000000000000000000..878a6effc7e681ff86a3bd2bd94f835e9b457391 --- /dev/null +++ b/mt564_training_data.json @@ -0,0 +1,102 @@ +[ + { + "instruction": "What are the main sequences in the MT564 message format?", + "response": "The MT564 message format consists of the following main sequences:\n\n- Sequence A - General Information\n- Sequence B - Underlying Securities\n- Sequence C - Intermediate Securities\n- Sequence D - Corporate Action Details\n- Sequence E - Corporate Action Options\n- Sequence F - Additional Information" + }, + { + "instruction": "Describe the 'Sequence A - General Information' sequence in MT564 format.", + "response": "The 'Sequence A - General Information' sequence is Mandatory. It contains 8 fields and 2 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence A - General Information' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: GENL\n- 28E: Page Number/Continuation Indicator (O) - Format: 5n/4!c\n- 20C: Reference (M) - Format: :4!c//16x\n- 23G: Function of the Message (M) - Format: 4!c[/4!c]\n- 22F: Indicator (M) - Format: :4!c/[8c]/4!c\n- 98a: Date/Time (O) - Format: A, C, or E\n- 25D: Status Code (M) - Format: :4!c/[8c]/4!c\n- 16S: End of Block (M) - Format: GENL" + }, + { + "instruction": "What subsequences are part of the 'Sequence A - General Information' sequence in MT564?", + "response": "The 'Sequence A - General Information' sequence contains the following subsequences:\n\n- Subsequence A1 - Linkages\n- Subsequence A2 - Reversal Reason" + }, + { + "instruction": "Describe the 'Subsequence A1 - Linkages' subsequence in MT564 format.", + "response": "The 'Subsequence A1 - Linkages' subsequence is Optional Repetitive. It contains 5 fields." + }, + { + "instruction": "List the fields in the 'Subsequence A1 - Linkages' subsequence of MT564.", + "response": "The fields in this subsequence are:\n\n- 16R: Start of Block (M) - Format: LINK\n- 22F: Indicator (O) - Format: :4!c/[8c]/4!c\n- 13a: Number Identification (O) - Format: A or B\n- 20C: Reference (M) - Format: :4!c//16x\n- 16S: End of Block (M) - Format: LINK" + }, + { + "instruction": "Describe the 'Subsequence A2 - Reversal Reason' subsequence in MT564 format.", + "response": "The 'Subsequence A2 - Reversal Reason' subsequence is Optional. It contains 4 fields." + }, + { + "instruction": "List the fields in the 'Subsequence A2 - Reversal Reason' subsequence of MT564.", + "response": "The fields in this subsequence are:\n\n- 16R: Start of Block (M) - Format: REVR\n- 24B: Reason Code (M) - Format: :4!c/[8c]/4!c\n- 70D: Narrative (O) - Format: :4!c//6*35x\n- 16S: End of Block (M) - Format: REVR" + }, + { + "instruction": "Describe the 'Sequence B - Underlying Securities' sequence in MT564 format.", + "response": "The 'Sequence B - Underlying Securities' sequence is Mandatory. It contains 3 fields and 2 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence B - Underlying Securities' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: USECU\n- 35B: Identification of the Financial Instrument (M) - Format: [ISIN1!e12!c]\n[4*35x]\n- 16S: End of Block (M) - Format: USECU" + }, + { + "instruction": "What subsequences are part of the 'Sequence B - Underlying Securities' sequence in MT564?", + "response": "The 'Sequence B - Underlying Securities' sequence contains the following subsequences:\n\n- Subsequence B1 - Financial Instrument Attributes\n- Subsequence B2 - Account Information" + }, + { + "instruction": "Describe the 'Subsequence B1 - Financial Instrument Attributes' subsequence in MT564 format.", + "response": "The 'Subsequence B1 - Financial Instrument Attributes' subsequence is Optional. It contains 9 fields." + }, + { + "instruction": "List the fields in the 'Subsequence B1 - Financial Instrument Attributes' subsequence of MT564.", + "response": "The fields in this subsequence are:\n\n- 16R: Start of Block (M) - Format: FIA\n- 94B: Place (O) - Format: :4!c/[8c]/4!c[/30x]\n- 22F: Indicator (O) - Format: :4!c/[8c]/4!c\n- 12a: Type of Financial Instrument (O) - Format: A, B, or C\n- 11A: Currency (O) - Format: :4!c//3!a\n- 98A: Date/Time (O) - Format: :4!c//8!n\n- 92a: Rate (O) - Format: A, D, or K\n- 36a: Quantity of Financial Instrument (O) - Format: B or D\n- 16S: End of Block (M) - Format: FIA" + }, + { + "instruction": "Describe the 'Subsequence B2 - Account Information' subsequence in MT564 format.", + "response": "The 'Subsequence B2 - Account Information' subsequence is Mandatory Repetitive. It contains 6 fields." + }, + { + "instruction": "List the fields in the 'Subsequence B2 - Account Information' subsequence of MT564.", + "response": "The fields in this subsequence are:\n\n- 16R: Start of Block (M) - Format: ACCTINFO\n- 95a: Party (O) - Format: P or R\n- 97a: Account (M) - Format: A, C, or D\n- 94a: Place (O) - Format: B, C, or F\n- 93a: Balance (O) - Format: B, C, E, or F\n- 16S: End of Block (M) - Format: ACCTINFO" + }, + { + "instruction": "Describe the 'Sequence C - Intermediate Securities' sequence in MT564 format.", + "response": "The 'Sequence C - Intermediate Securities' sequence is Optional. It contains 10 fields and 0 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence C - Intermediate Securities' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: INTSEC\n- 35B: Identification of the Financial Instrument (M) - Format: [ISIN1!e12!c]\n[4*35x]\n- 36a: Quantity of Financial Instrument (O) - Format: B, D, or E\n- 93a: Balance (O) - Format: B, C, or E\n- 22F: Indicator (O) - Format: :4!c/[8c]/4!c\n- 92D: Rate (O) - Format: :4!c//15d/15d\n- 90B: Price (O) - Format: :4!c//4!c/3!a15d\n- 98a: Date/Time (M) - Format: A, B, or E\n- 69a: Period (O) - Format: A, B, C, D, E, or F\n- 16S: End of Block (M) - Format: INTSEC" + }, + { + "instruction": "Describe the 'Sequence D - Corporate Action Details' sequence in MT564 format.", + "response": "The 'Sequence D - Corporate Action Details' sequence is Optional. It contains 5 fields and 0 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence D - Corporate Action Details' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: CADETL\n- 98a: Date/Time (O) - Format: A, B, C, E, or F\n- 35B: Identification of the Financial Instrument (O) - Format: [ISIN1!e12!c]\n[4*35x]\n- 17B: Flag (O) - Format: :4!c//1!a\n- 16S: End of Block (M) - Format: CADETL" + }, + { + "instruction": "Describe the 'Sequence E - Corporate Action Options' sequence in MT564 format.", + "response": "The 'Sequence E - Corporate Action Options' sequence is Optional Repetitive. It contains 6 fields and 0 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence E - Corporate Action Options' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: CAOPTN\n- 13A: Number Identification (M) - Format: :4!c//3!c\n- 22F: Indicator (M) - Format: :4!c/[8c]/4!c\n- 11A: Currency (O) - Format: :4!c//3!a\n- 17B: Flag (O) - Format: :4!c//1!a\n- 16S: End of Block (M) - Format: CAOPTN" + }, + { + "instruction": "Describe the 'Sequence F - Additional Information' sequence in MT564 format.", + "response": "The 'Sequence F - Additional Information' sequence is Optional. It contains 4 fields and 0 subsequences." + }, + { + "instruction": "List the fields in the 'Sequence F - Additional Information' sequence of MT564.", + "response": "The fields in this sequence are:\n\n- 16R: Start of Block (M) - Format: ADDINFO\n- 70E: Narrative (O) - Format: :4!c//10*35x\n- 70E: Narrative (O) - Format: :4!c//10*35x\n- 16S: End of Block (M) - Format: ADDINFO" + }, + { + "instruction": "What is an MT564 message used for?", + "response": "The MT564 is a SWIFT message format used for Corporate Action Notifications. It is sent by an account servicer to an account owner to notify them about a forthcoming corporate action event and the options or choices available to the account owner. This may include events such as dividend announcements, rights issues, stock splits, mergers, and other corporate events that affect securities." + }, + { + "instruction": "Explain the structure of MT564 messages.", + "response": "MT564 messages follow a structured format with sequences and subsequences:\n\n1. Sequence A - General Information (Mandatory): Contains basic information about the message\n2. Sequence B - Underlying Securities (Mandatory): Details about the affected securities\n3. Sequence C - Intermediate Securities (Optional): Information about intermediate securities\n4. Sequence D - Corporate Action Details (Optional): Details about the corporate action\n5. Sequence E - Corporate Action Options (Optional): Available options for the account owner\n6. Sequence F - Additional Information (Optional): Any additional relevant information\n\nEach sequence contains specific fields, identified by tags, that carry different pieces of information." + } +] \ No newline at end of file diff --git a/prepare_data.py b/prepare_data.py new file mode 100644 index 0000000000000000000000000000000000000000..849a6a1c6af15f11cba889b475e93dacef8a1ad3 --- /dev/null +++ b/prepare_data.py @@ -0,0 +1,159 @@ +""" +Data Preparation Script for TinyLlama Training +This script helps prepare data in the right format for TinyLlama training. +""" + +import os +import json +import argparse +import pandas as pd +from tqdm import tqdm +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Prepare data for TinyLlama training") + parser.add_argument( + "--input_files", + nargs="+", + required=True, + help="Path to input text files (accepts multiple files)" + ) + parser.add_argument( + "--output_file", + type=str, + default="training_data.json", + help="Output JSON file with prepared data" + ) + parser.add_argument( + "--format", + type=str, + choices=["instruction", "completion", "plain"], + default="plain", + help="Data format: instruction (instruction-response pairs), completion (text completion), or plain text" + ) + parser.add_argument( + "--chunk_size", + type=int, + default=512, + help="Size of text chunks for plain text format" + ) + parser.add_argument( + "--overlap", + type=int, + default=50, + help="Overlap between chunks for plain text format" + ) + return parser.parse_args() + +def chunk_text(text, chunk_size, overlap): + """Split text into overlapping chunks of specified size""" + chunks = [] + start = 0 + + # Skip empty or very short texts + if len(text) < chunk_size / 2: + return [] + + while start < len(text): + end = min(start + chunk_size, len(text)) + + # Don't create tiny chunks at the end + if end - start < chunk_size / 2 and chunks: + # Extend the last chunk instead + chunks[-1] = text[start - chunk_size + overlap:end] + break + + chunks.append(text[start:end]) + start += chunk_size - overlap + + return chunks + +def process_instruction_data(file_paths): + """Process data formatted as instruction-response pairs""" + data = [] + + for file_path in file_paths: + logger.info(f"Processing instruction data from: {file_path}") + try: + # Assuming JSON file with instruction-response pairs + with open(file_path, 'r', encoding='utf-8') as f: + file_data = json.load(f) + + for item in tqdm(file_data): + if "instruction" in item and "response" in item: + # Format as a prompt for TinyLlama + text = f"<|im_start|>user\n{item['instruction']}<|im_end|>\n<|im_start|>assistant\n{item['response']}<|im_end|>" + data.append({"text": text}) + except Exception as e: + logger.error(f"Error processing {file_path}: {e}") + + return data + +def process_completion_data(file_paths): + """Process data formatted for completion""" + data = [] + + for file_path in file_paths: + logger.info(f"Processing completion data from: {file_path}") + try: + # Assuming JSON file with prompt-completion pairs + with open(file_path, 'r', encoding='utf-8') as f: + file_data = json.load(f) + + for item in tqdm(file_data): + if "prompt" in item and "completion" in item: + text = f"{item['prompt']}{item['completion']}" + data.append({"text": text}) + except Exception as e: + logger.error(f"Error processing {file_path}: {e}") + + return data + +def process_plain_text(file_paths, chunk_size, overlap): + """Process plain text files by chunking them""" + data = [] + + for file_path in file_paths: + logger.info(f"Processing plain text from: {file_path}") + try: + with open(file_path, 'r', encoding='utf-8') as f: + text = f.read() + + chunks = chunk_text(text, chunk_size, overlap) + logger.info(f"Created {len(chunks)} chunks from {file_path}") + + for chunk in chunks: + data.append({"text": chunk}) + except Exception as e: + logger.error(f"Error processing {file_path}: {e}") + + return data + +def main(): + args = parse_args() + + # Process data based on format + if args.format == "instruction": + data = process_instruction_data(args.input_files) + elif args.format == "completion": + data = process_completion_data(args.input_files) + else: # plain text + data = process_plain_text(args.input_files, args.chunk_size, args.overlap) + + logger.info(f"Total processed examples: {len(data)}") + + # Save processed data + output_dir = os.path.dirname(args.output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + with open(args.output_file, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + logger.info(f"Data saved to {args.output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/prepare_mt564_data.py b/prepare_mt564_data.py new file mode 100644 index 0000000000000000000000000000000000000000..eaadfe2f4c9a8e65d0be3ab8c9c25998878151e7 --- /dev/null +++ b/prepare_mt564_data.py @@ -0,0 +1,151 @@ +""" +MT564 Data Preparation Script +This script converts MT564 format specifications into training data for TinyLlama +""" + +import os +import json +import argparse +from typing import List, Dict, Any + +def parse_args(): + parser = argparse.ArgumentParser(description="Prepare MT564 format specifications for TinyLlama training") + parser.add_argument( + "--input_file", + type=str, + required=True, + help="Path to the MT564 format specification JSON file" + ) + parser.add_argument( + "--output_file", + type=str, + default="mt564_training_data.json", + help="Output file to save the prepared training data" + ) + return parser.parse_args() + +def create_instruction_examples(data: Dict[str, Any]) -> List[Dict[str, str]]: + """Convert MT564 specification into instruction-response pairs""" + examples = [] + + # Extract sequences from the specification + #sequences = data.get("MT564FormatSpecifications", {}).get("sequences", []) + sequences = data # if the input is already a list + + # Create examples for sequence overview + sequence_names = [seq["name"] for seq in sequences] + examples.append({ + "instruction": "What are the main sequences in the MT564 message format?", + "response": f"The MT564 message format consists of the following main sequences:\n\n" + + "\n".join([f"- {name}" for name in sequence_names]) + }) + + # Create examples for each sequence + for sequence in sequences: + seq_name = sequence["name"] + status = sequence["status"] + + # Example for sequence details + examples.append({ + "instruction": f"Describe the '{seq_name}' sequence in MT564 format.", + "response": f"The '{seq_name}' sequence is {status}. " + + f"It contains {len(sequence.get('fields', []))} fields " + + f"and {len(sequence.get('subsequences', []))} subsequences." + }) + + # Examples for fields in the sequence + fields = sequence.get("fields", []) + + field_examples = [] + for field in fields: + field_tag = field.get("tag", "") + field_name = field.get("fieldName", "") + field_status = field.get("status", "") + field_content = field.get("content", "") + + field_examples.append(f"- {field_tag}: {field_name} ({field_status}) - Format: {field_content}") + + if field_examples: + examples.append({ + "instruction": f"List the fields in the '{seq_name}' sequence of MT564.", + "response": "The fields in this sequence are:\n\n" + "\n".join(field_examples) + }) + + # Examples for subsequences + subsequences = sequence.get("subsequences", []) + if subsequences: + subseq_names = [subseq["name"] for subseq in subsequences] + examples.append({ + "instruction": f"What subsequences are part of the '{seq_name}' sequence in MT564?", + "response": f"The '{seq_name}' sequence contains the following subsequences:\n\n" + + "\n".join([f"- {name}" for name in subseq_names]) + }) + + # Create examples for each subsequence + for subsequence in subsequences: + subseq_name = subsequence["name"] + subseq_status = subsequence["status"] + subseq_fields = subsequence.get("fields", []) + + examples.append({ + "instruction": f"Describe the '{subseq_name}' subsequence in MT564 format.", + "response": f"The '{subseq_name}' subsequence is {subseq_status}. " + + f"It contains {len(subseq_fields)} fields." + }) + + # Examples for fields in subsequence + subfield_examples = [] + for field in subseq_fields: + field_tag = field.get("tag", "") + field_name = field.get("fieldName", "") + field_status = field.get("status", "") + field_content = field.get("content", "") + + subfield_examples.append(f"- {field_tag}: {field_name} ({field_status}) - Format: {field_content}") + + if subfield_examples: + examples.append({ + "instruction": f"List the fields in the '{subseq_name}' subsequence of MT564.", + "response": "The fields in this subsequence are:\n\n" + "\n".join(subfield_examples) + }) + + # Add generic examples + examples.append({ + "instruction": "What is an MT564 message used for?", + "response": "The MT564 is a SWIFT message format used for Corporate Action Notifications. It is sent by an account servicer to an account owner to notify them about a forthcoming corporate action event and the options or choices available to the account owner. This may include events such as dividend announcements, rights issues, stock splits, mergers, and other corporate events that affect securities." + }) + + examples.append({ + "instruction": "Explain the structure of MT564 messages.", + "response": "MT564 messages follow a structured format with sequences and subsequences:\n\n" + + "1. Sequence A - General Information (Mandatory): Contains basic information about the message\n" + + "2. Sequence B - Underlying Securities (Mandatory): Details about the affected securities\n" + + "3. Sequence C - Intermediate Securities (Optional): Information about intermediate securities\n" + + "4. Sequence D - Corporate Action Details (Optional): Details about the corporate action\n" + + "5. Sequence E - Corporate Action Options (Optional): Available options for the account owner\n" + + "6. Sequence F - Additional Information (Optional): Any additional relevant information\n\n" + + "Each sequence contains specific fields, identified by tags, that carry different pieces of information." + }) + + return examples + +def main(): + args = parse_args() + + # Load MT564 format specification + with open(args.input_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Create instruction-response pairs + examples = create_instruction_examples(data) + + print(f"Created {len(examples)} training examples from MT564 specifications") + + # Save the prepared data + with open(args.output_file, 'w', encoding='utf-8') as f: + json.dump(examples, f, ensure_ascii=False, indent=2) + + print(f"Saved training data to {args.output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..c92eebb3d76b38131de68b9c2928dd009a26391c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,1139 @@ +[project] +name = "repl-nix-workspace" +version = "0.1.0" +description = "Add your description here" +requires-python = ">=3.11" +dependencies = [ + "email-validator>=2.2.0", + "flask-cors>=5.0.1", + "flask>=3.1.1", + "flask-sqlalchemy>=3.1.1", + "gunicorn>=23.0.0", + "openai>=1.78.1", + "schedule>=1.2.2", + "trafilatura>=2.0.0", + "beautifulsoup4>=4.13.4", + "requests>=2.32.3", +] + +[[tool.uv.index]] +explicit = true +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" + +[tool.uv.sources] +AA-module = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ABlooper = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +AnalysisG = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +AutoRAG = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +BERTeam = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +BxTorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Byaldi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +CALM-Pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +COPEX-high-rate-compression-quality-metrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +CityLearn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +CoCa-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +CoLT5-attention = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ComfyUI-EasyNodes = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Crawl4AI = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +DALL-E = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +DI-toolkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +DatasetRising = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +DeepCache = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +DeepMatter = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Draugr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ESRNN = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +En-transformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ExpoSeq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +FLAML = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +FSRS-Optimizer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +GANDLF = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +GQLAlchemy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +GhostScan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +GraKeL = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +HEBO = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +IOPaint = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ISLP = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +InvokeAI = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +JAEN = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +KapoorLabs-Lightning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +LightAutoML = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +LingerGRN = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +MMEdu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +MRzeroCore = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Modeva = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +NeuralFoil = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +NiMARE = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +NinjaTools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +OpenHosta = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +OpenNMT-py = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +POT = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +PVNet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +PaLM-rlhf-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +PepperPepper = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +PiML = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Poutyne = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +QNCP = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +RAGatouille = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +RareGO = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +RealtimeSTT = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +RelevanceAI-Workflows-Core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Resemblyzer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ScandEval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +Simba-UW-tf-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +SwissArmyTransformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +TPOT = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +TTS = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +TorchCRF = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +TotalSegmentator = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +UtilsRL = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +WhisperSpeech = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +XAISuite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +a-unet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +a5dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +accelerate = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +accelerated-scan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +accern-xyme = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +achatbot = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +acids-rave = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +actorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +acvl-utils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adabelief-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adam-atan2-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adan-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adapters = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +admin-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adtoolbox = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +adversarial-robustness-toolbox = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aeiou = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aeon = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +africanwhisper = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ag-llama-api = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +agentdojo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +agilerl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai-edge-torch-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai-parrot = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai-python = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai-transform = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai2-olmo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai2-olmo-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ai2-tango = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aicmder = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aider-chat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aider-chat-x = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aif360 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aihwkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aimodelshare = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +airllm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +airtestProject = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +airunner = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aisak = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aislib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aisquared = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aistore = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aithree = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +akasha-terminal = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alibi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alibi-detect = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alignn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +all-clip = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +allennlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +allennlp-models = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +allennlp-pvt-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +allophant = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +allosaurus = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aloy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alpaca-eval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alphafold2-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alphafold3-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alphamed-federated = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +alphawave = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +amazon-braket-pennylane-plugin = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +amazon-photos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +anemoi-graphs = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +anemoi-models = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +anomalib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +apache-beam = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +apache-tvm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aperturedb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aphrodite-engine = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aqlm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +arcAGI2024 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +archisound = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +argbind = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +arize = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +arm-pytorch-utilities = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +array-api-compat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +arus = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +assert-llm-tools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +asteroid = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +asteroid-filterbanks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +astra-llm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +astrovision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +atomate2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +attacut = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +audio-diffusion-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +audio-encoders-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +audio-separator = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +audiocraft = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +audiolm-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +auralis = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +auraloss = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +auto-gptq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +autoawq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +autoawq-kernels = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +"autogluon.multimodal" = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +"autogluon.tabular" = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +"autogluon.timeseries" = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +autotrain-advanced = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +avdeepfake1m = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +aws-fortuna = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ax-platform = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +azureml-automl-dnn-vision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +azureml-contrib-automl-dnn-forecasting = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +azureml-evaluate-mlflow = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +azureml-metrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +azureml-train-automl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +b2bTools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +backpack-for-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +balrog-nle = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +batch-face = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +batchalign = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +batchgeneratorsv2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +batchtensor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bbrl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +benchpots = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bent = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bert-score = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bertopic = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bertviz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bestOf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +betty-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +big-sleep = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bigdl-core-cpp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bigdl-core-npu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bigdl-llm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bigdl-nano = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +"bioimageio.core" = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bitfount = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bitsandbytes = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bittensor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bittensor-cli = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +blackboxopt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +blanc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +blindai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +bm25-pt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +boltz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +botorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +boxmot = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +brainchain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +braindecode = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +brevitas = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +briton = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +browsergym-visualwebarena = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +buzz-captions = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +byotrack = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +byzerllm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +c4v-py = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +calflops = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +came-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +camel-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +camel-tools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cannai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +captum = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +carte-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +carvekit-colab = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +catalyst = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +causalml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +causalnex = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +causy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cbrkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cca-zoo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cdp-backend = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cellacdc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cellfinder = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cellpose = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cellxgene-census = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +chattts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +chemprop = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +chgnet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +chitra = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +circuitsvis = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cjm-yolox-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clarinpl-embeddings = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +class-resolver = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +classifier-free-guidance-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +classiq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +classy-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clean-fid = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cleanvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clip-anytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clip-benchmark = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clip-by-openai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clip-interrogator = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clip-retrieval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cltk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +clusterops = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cnocr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cnstd = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +coba = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cofi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +colbert-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +colpali-engine = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +compel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +composabl-ray = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +composabl-ray-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +composabl-train = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +composabl-train-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +composer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +compressai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +compressed-tensors = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +compressed-tensors-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +concrete-python = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +confit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +conformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +contextualSpellCheck = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +continual-inference = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +controlnet-aux = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +convokit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +coola = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +coqui-tts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +coqui-tts-trainer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +craft-text-detector = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +creme = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +crocodile = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +crowd-kit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cryoSPHERE = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +csle-common = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +csle-system-identification = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ctgan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +curated-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cut-cross-entropy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cvat-sdk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +cybertask = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +d3rlpy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dalle-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dalle2-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +danila-lib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +danling = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +darts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +darwin-py = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +data-gradients = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +datachain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dataclass-array = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dataeval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +datarobot-drum = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +datarobotx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +datasets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +datumaro = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dctorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deep-utils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepchecks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepchem = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepctr-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepecho = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepepochs = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepforest = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deeplabcut = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepmd-kit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepmultilingualpunctuation = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepparse = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deeprobust = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepsparse = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepsparse-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +deepspeed = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +denoising-diffusion-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +descript-audio-codec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +descript-audiotools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +detecto = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +detoxify = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dgenerate = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dghs-imgutils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dgl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dialogy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dice-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +diffgram = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +diffq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +diffusers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +distilabel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +distrifuser = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dnikit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +docarray = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +doclayout-yolo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +docling-ibm-models = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +docquery = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +domino-code-assist = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dreamsim = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dropblock = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +druida = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +dvclive = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +e2-tts-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +e2cnn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +e3nn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +easyocr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ebtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ecallisto-ng = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +edsnlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +effdet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +einx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +eir-dl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +eis1600 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +eland = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ema-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +embedchain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +enformer-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +entmax = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +esm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +espaloma-charge = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +espnet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +etils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +etna = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +evadb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +evalscope = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +evaluate = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +exllamav2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +extractable = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +face-alignment = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +facenet-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +facexlib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fair-esm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fairseq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fairseq2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fairseq2n = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +faker-file = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +farm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fast-bert = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fast-pytorch-kmeans = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fastai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fastcore = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fastestimator-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fasttreeshap = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fedml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +felupe = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +femr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fft-conv-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fickling = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fireworks-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flair = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flashrag-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flax = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flexgen = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flgo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flopth = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flowcept = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flytekitplugins-kfpytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +flytekitplugins-onnxpytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fmbench = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +focal-frequency-loss = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +foldedtensor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fractal-tasks-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +freegenius = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +freqtrade = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +fschat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +funasr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +functorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +funlbm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +funsor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +galore-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +garak = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +garf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gateloop-transformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +geffnet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +genutility = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gfpgan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gigagan-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gin-config = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +glasflow = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gliner = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gluonts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gmft = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +google-cloud-aiplatform = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gpforecaster = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gpt3discord = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gpytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +grad-cam = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +graph-weather = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +graphistry = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gravitorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gretel-synthetics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gsplat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +guardrails-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +guidance = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +gymnasium = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hanlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +happytransformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hbutils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +heavyball = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hezar = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hf-deepali = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hf-doc-builder = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +higher = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hjxdl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hkkang-utils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hordelib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hpsv2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +huggingface-hub = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hummingbird-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hvae-backbone = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hya = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +hypothesis-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ibm-metrics-plugin = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ibm-watson-machine-learning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ibm-watsonx-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +icetk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +icevision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +iden = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +idvpackage = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +iglovikov-helper-functions = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +imagededup = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +imagen-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +imaginAIry = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +img2vec-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +incendio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +inference = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +inference-gpu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +infinity-emb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +info-nce-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +infoapps-mlops-sdk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +instructlab = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +instructlab-dolomite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +instructlab-eval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +instructlab-sdg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +instructlab-training = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +invisible-watermark = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +iobm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ipex-llm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +iree-turbine = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +irisml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +irisml-tasks-azure-openai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +irisml-tasks-torchvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +irisml-tasks-training = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +item-matching = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ivadomed = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +jaqpotpy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +jina = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +judo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +junky = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +k-diffusion = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +k1lib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +k2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kappadata = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kappamodules = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +karbonn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kats = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kbnf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kedro-datasets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +keybert = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +keytotext = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +khoj = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kiui = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +konfuzio-sdk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kornia = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kornia-moons = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kraken = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kwarray = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +kwimage = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +labml-nn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lagent = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +laion-clap = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lale = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lama-cleaner = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lancedb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +langcheck = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +langkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +langroid = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +langtest = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +layoutparser = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ldp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +leafmap = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +leap-ie = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +leibniz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +leptonai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +letmedoit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lhotse = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lib310 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +libpecos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +librec-auto = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +libretranslate = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +liger-kernel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +liger-kernel-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightning-bolts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightning-fabric = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightning-habana = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightning-lite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightrag = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightweight-gan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lightwood = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +linear-attention-transformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +linear-operator = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +linformer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +linformer-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +liom-toolkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lion-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lit-nlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +litdata = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +litelama = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +litgpt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llama-index-embeddings-adapter = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llama-index-embeddings-clip = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llama-index-embeddings-instructor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llama-index-llms-huggingface = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llama-index-postprocessor-colbert-rerank = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llm-blender = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llm-foundry = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llm-guard = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llm-rs = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llm2vec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llmcompressor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llmlingua = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +llmvm-cli = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lm-eval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lmdeploy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lmms-eval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +local-attention = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lovely-tensors = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lpips = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +lycoris-lora = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mace-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +magic-pdf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +magicsoup = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +magvit2-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +maite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +manga-ocr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +manifest-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +manipulation = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +marker-pdf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +matgl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +med-imagetools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +medaka = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +medcat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +medmnist = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +megablocks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +megatron-energon = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +memos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +meshgpt-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +metatensor-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mflux = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mia-vgg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +miditok = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +minari = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +minicons = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ml2rt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlagents = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlbench-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlcroissant = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlpfile = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mlx-whisper = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmaction2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmengine = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmengine-lite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmocr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmpose = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mmsegmentation = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +modeci-mdf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +model2vec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +modelscope = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +modelspec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +monai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +monai-weekly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +monotonic-alignment-search = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +monty = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mosaicml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mosaicml-streaming = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +moshi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mteb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +mtmtrain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +multi-quantization = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +myhand = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nGPT-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +naeural-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +napari = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +napatrackmater = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nara-wpe = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +natten = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nbeats-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nebulae = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nemo-toolkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neptune = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neptune-client = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nerfacc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nerfstudio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nessai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +netcal = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neural-rag = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neuralforecast = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neuralnets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neuralprophet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +neuspell = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nevergrad = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nexfort = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nimblephysics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nirtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nkululeko = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nlptooltest = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nnAudio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nnodely = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nnsight = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nnunetv2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +noisereduce = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nonebot-plugin-nailongremove = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nowcasting-dataloader = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nowcasting-forecast = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nshtrainer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nuwa-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nvflare = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +nvidia-modelopt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ocf-datapipes = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ocnn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ogb = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ohmeow-blurr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +olive-ai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +omlt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ommlx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +onediff = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +onediffx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +onnx2pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +onnx2torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +opacus = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +open-clip-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +open-flamingo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +open-interpreter = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openbb-terminal-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openmim = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openparse = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openunmix = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openvino-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openvino-tokenizers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openvino-xai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +openwakeword = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +opt-einsum-fx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optimum = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optimum-habana = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optimum-intel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optimum-neuron = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optimum-quanto = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optree = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optuna = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optuna-dashboard = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +optuna-integration = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +oracle-ads = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +orbit-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +otx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +outetts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +outlines = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +outlines-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +paddlenlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pai-easycv = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pandasai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +panns-inference = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +patchwork-cli = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +peft = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pegasuspy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pelutils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +penn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +perforatedai-freemium = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +performer-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +petastorm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pfio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pgmpy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +phenolrs = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +phobos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pi-zero-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pinecone-text = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +piq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pix2tex = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pix2text = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pnnx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +policyengine-us-data = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +polyfuzz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pomegranate = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +positional-encodings = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +prefigure = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +product-key-memory = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ptflops = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ptwt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pulser-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +punctuators = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +py2ls = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyabsa = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +"pyannote.audio" = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyawd = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyclarity = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pycox = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyfemtet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyg-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pygrinder = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyhealth = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyhf = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyiqa = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pykeen = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pykeops = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pylance = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pylineaGT = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pymanopt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pymde = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pypots = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyqlib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyqtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyro-ppl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pysentimiento = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyserini = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pysr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pythainlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +python-doctr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-fid = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-forecasting = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-ignite = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-kinematics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-lightning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-lightning-bolts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-metric-learning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-model-summary = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-msssim = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-pfn-extras = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-pretrained-bert = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-ranger = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-seed = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-tabnet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-tabular = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-toolbelt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-transformers-pvt-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-triton-rocm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-warmup = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch-wavelets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch_optimizer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorch_revgrad = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorchcv = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pytorchltr2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyvene = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +pyvespa = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +qianfan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +qibo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +qiskit-machine-learning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +qtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +quanto = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +quick-anomaly-detector = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rastervision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rastervision-pytorch-backend = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rastervision-pytorch-learner = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ray-lightning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rclip = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +realesrgan = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +recbole = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +recommenders = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +redcat = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +reformer-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +regex-sampler = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +replay-rec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rerankers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +research-framework = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +resemble-enhance = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +resnest = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rf-clip = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rf-groundingdino = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rfconv = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rich-logger = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ring-attention-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rltrade-test = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rotary-embedding-torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rsp-ml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +rust-circuit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +s2fft = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +s3prl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +s3torchconnector = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +saferx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +safetensors = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sagemaker-huggingface-inference-toolkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sagemaker-ssh-helper = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +salesforce-lavis = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +salesforce-merlion = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +samv2 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +scib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +scib-metrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +scvi-tools = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sdmetrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +secretflow = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +segment-anything-hq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +segment-anything-py = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +segmentation-models-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +self-rewarding-lm-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +semantic-kernel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +semantic-router = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +senselab = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sent2vec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sentence-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sequence-model-train = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +serotiny = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sevenn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sglang = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +shap = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +silero-api-server = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +silero-vad = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +silicondiff-npu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +simclr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +simple-lama-inpainting = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sinabs = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sixdrepnet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +skforecast = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +skorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +skrl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +skt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sktime = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sktmls = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +slangtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +smartnoise-synth = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +smashed = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +smplx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +smqtk-descriptors = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +smqtk-detection = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +snntorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +snorkel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +snowflake-ml-python = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +so-vits-svc-fork = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sonusai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sony-custom-layers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sotopia = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacy-curated-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacy-experimental = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacy-huggingface-pipelines = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacy-llm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spacy-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +span-marker = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spandrel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spandrel-extra-arches = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sparrow-python = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spatialdata = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +speechbrain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +speechtokenizer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spikeinterface = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spikingjelly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spotiflow = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spotpython = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +spotriver = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +squirrel-core = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stable-baselines3 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stable-diffusion-sdkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stable-ts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stanford-stk = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stanfordnlp = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stanza = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +startorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +streamtasks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +struct-eqtable = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +stylegan2-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +supar = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +super-gradients = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +super-image = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +superlinked = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +supervisely = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +surya-ocr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +svdiff-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +swarm-models = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +swarmauri = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +swarms-memory = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +swebench = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +syft = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +sympytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +syne-tune = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +synthcity = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +t5 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tab-transformer-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tabpfn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +taming-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +taming-transformers-rom1504 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +taskwiz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tbparse = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tecton = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensor-parallel = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensorcircuit-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensordict = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensordict-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensorizer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tensorrt-llm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +texify = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +text2text = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +textattack = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tfkit = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +thepipe-api = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +thinc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +thingsvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +thirdai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +thop = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tianshou = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tidy3d = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +timesfm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +timm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tipo-kgen = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tmnt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +toad = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tomesd = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +top2vec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-audiomentations = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-dct = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-delaunay = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-directml = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-ema = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-encoding = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-fidelity = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-geometric = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-geopooling = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-harmonics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-kmeans = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-lr-finder = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-max-mem = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-npu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-optimi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-optimizer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-ort = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-pitch-shift = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-ppr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-pruning = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-snippets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-stoi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-struct = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torch-tensorrt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchani = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchattacks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchaudio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchbiggraph = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchcam = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchcde = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchcfm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchcrepe = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchdata = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchdatasets-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchdiffeq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchdyn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchestra = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torcheval = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torcheval-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchextractor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchfcpe = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchfun = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchfunc-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchgeo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchgeometry = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchio = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchjpeg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchlayers-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchmeta = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchmetrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchmocks = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchpack = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchpippy = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchpq = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchprofile = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchquantlib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchrec = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchrec-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchrec-nightly-cpu = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchrl = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchrl-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchscale = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchsde = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchseg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchserve = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchserve-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchsnapshot-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchsr = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchstain = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchsummaryX = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchtext = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchtnt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchtnt-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchtyping = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchutil = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchvinecopulib = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchviz = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchx-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +torchxrayvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +totalspineseg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tracebloc-package-dev = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +trainer = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transformer-engine = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transformer-lens = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transformer-smaller-training-vocab = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transformers-domain-adaptation = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transfusion-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +transparent-background = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +treescope = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +trolo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tsai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tslearn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ttspod = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +txtai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +tyro = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +u8darts = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +uhg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +uitestrunner-syberos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ultimate-rvc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ultralytics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +ultralytics-thop = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unav = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unbabel-comet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +underthesea = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unfoldNd = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unimernet = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unitorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unitxt = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unsloth = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unsloth-zoo = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unstructured = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +unstructured-inference = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +utilsd = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +v-diffusion-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vIQA = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vectice = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vector-quantize-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vectorhub-nightly = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +versatile-audio-upscaler = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vertexai = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vesin = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vgg-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +video-representations-extractor = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +viser = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vision-datasets = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +visionmetrics = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +visu3d = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vit-pytorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +viturka-nn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vllm = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vllm-flash-attn = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vocos = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vollseg = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +vtorch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +wavmark = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +wdoc = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +whisper-live = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +whisper-timestamped = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +whisperx = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +wilds = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +wordllama = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +worker-automate-hub = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +wxbtool = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +x-clip = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +x-transformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +xaitk_saliency = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +xformers = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +xgrammar = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +xinference = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +xtts-api-server = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +yolo-poser = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +yolov5 = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +yolov7-package = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +yta-general-utils = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +zensvi = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +zetascale = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] +zuko = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }] diff --git a/run_horoscope.py b/run_horoscope.py new file mode 100644 index 0000000000000000000000000000000000000000..b2ca18563864da27b19a562c7dc6b02a58c61bc2 --- /dev/null +++ b/run_horoscope.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python3 +""" +Standalone Horoscope API Server +This script runs the horoscope system without database dependencies +""" + +import os +import sys +import logging +from flask import Flask, jsonify, request, render_template_string +from flask_cors import CORS +import json +from datetime import datetime, date +import requests +from bs4 import BeautifulSoup +import trafilatura +import time + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Create Flask app +app = Flask(__name__) +app.secret_key = os.environ.get("SESSION_SECRET", "dev_secret_key") +CORS(app) + +# Zodiac signs +ZODIAC_SIGNS = [ + "aries", "taurus", "gemini", "cancer", + "leo", "virgo", "libra", "scorpio", + "sagittarius", "capricorn", "aquarius", "pisces" +] + +class HoroscopeScraper: + """Base horoscope scraper""" + + def __init__(self, timeout=30): + self.timeout = timeout + + def fetch_url(self, url): + """Fetch content from URL""" + try: + response = requests.get(url, timeout=self.timeout) + response.raise_for_status() + return response.text + except Exception as e: + logger.error(f"Error fetching {url}: {str(e)}") + return None + + def extract_text(self, html_content): + """Extract main text content from HTML""" + try: + text = trafilatura.extract(html_content) + return text if text else "" + except Exception as e: + logger.error(f"Error extracting text: {str(e)}") + return "" + +class AstrologyComScraper(HoroscopeScraper): + """Scraper for Astrology.com""" + + def __init__(self): + super().__init__() + self.base_url = "https://www.astrology.com" + + def scrape_sign(self, base_url, sign, date_str=None): + """Scrape horoscope for a specific sign""" + try: + # Format URL for astrology.com + url = f"{base_url}/horoscope/daily/{sign}" + + html_content = self.fetch_url(url) + if not html_content: + return {"success": False, "error": "Failed to fetch content"} + + text_content = self.extract_text(html_content) + if not text_content: + return {"success": False, "error": "Failed to extract text"} + + # Parse with BeautifulSoup for better extraction + soup = BeautifulSoup(html_content, 'html.parser') + + # Extract horoscope prediction + prediction = self._extract_prediction(soup, text_content) + if not prediction: + return {"success": False, "error": "Could not find horoscope prediction"} + + return { + "success": True, + "sign": sign, + "prediction": prediction, + "date": date.today().isoformat(), + "source": "astrology.com", + "url": url + } + + except Exception as e: + logger.error(f"Error scraping {sign} from astrology.com: {str(e)}") + return {"success": False, "error": str(e)} + + def _extract_prediction(self, soup, text_content): + """Extract horoscope prediction from astrology.com""" + # Try multiple selectors + selectors = [ + '.horoscope-content', + '.daily-horoscope', + 'div[data-testid="horoscope-content"]', + '.horoscope-text' + ] + + for selector in selectors: + element = soup.select_one(selector) + if element: + return element.get_text().strip() + + # Fallback: extract from text content + lines = text_content.split('\n') + for i, line in enumerate(lines): + if any(word in line.lower() for word in ['today', 'daily', 'horoscope']): + # Return the next few lines as prediction + prediction_lines = lines[i:i+5] + return ' '.join(prediction_lines).strip() + + # Last resort: return first substantial paragraph + paragraphs = [p.strip() for p in text_content.split('\n') if len(p.strip()) > 50] + return paragraphs[0] if paragraphs else text_content[:300] + +class HoroscopeComScraper(HoroscopeScraper): + """Scraper for Horoscope.com""" + + def __init__(self): + super().__init__() + self.base_url = "https://www.horoscope.com" + + def scrape_sign(self, base_url, sign, date_str=None): + """Scrape horoscope for a specific sign""" + try: + # Map sign to horoscope.com format + sign_map = { + "aries": 1, "taurus": 2, "gemini": 3, "cancer": 4, + "leo": 5, "virgo": 6, "libra": 7, "scorpio": 8, + "sagittarius": 9, "capricorn": 10, "aquarius": 11, "pisces": 12 + } + + sign_id = sign_map.get(sign.lower()) + if not sign_id: + return {"success": False, "error": f"Invalid sign: {sign}"} + + url = f"{base_url}/us/horoscopes/general/horoscope-general-daily-today.aspx?sign={sign_id}" + + html_content = self.fetch_url(url) + if not html_content: + return {"success": False, "error": "Failed to fetch content"} + + text_content = self.extract_text(html_content) + if not text_content: + return {"success": False, "error": "Failed to extract text"} + + # Parse with BeautifulSoup + soup = BeautifulSoup(html_content, 'html.parser') + + # Extract horoscope prediction + prediction = self._extract_prediction(soup, text_content) + if not prediction: + return {"success": False, "error": "Could not find horoscope prediction"} + + return { + "success": True, + "sign": sign, + "prediction": prediction, + "date": date.today().isoformat(), + "source": "horoscope.com", + "url": url + } + + except Exception as e: + logger.error(f"Error scraping {sign} from horoscope.com: {str(e)}") + return {"success": False, "error": str(e)} + + def _extract_prediction(self, soup, text_content): + """Extract horoscope prediction from horoscope.com""" + # Try multiple selectors + selectors = [ + '.horoscope-content', + '.main-horoscope', + '#DailyHoroscope', + '.horoscope-text' + ] + + for selector in selectors: + element = soup.select_one(selector) + if element: + return element.get_text().strip() + + # Fallback: extract meaningful content from text + lines = text_content.split('\n') + prediction_lines = [] + + for line in lines: + line = line.strip() + if len(line) > 30 and not any(skip in line.lower() for skip in ['cookie', 'privacy', 'subscribe', 'newsletter']): + prediction_lines.append(line) + if len(prediction_lines) >= 3: + break + + return ' '.join(prediction_lines) if prediction_lines else text_content[:300] + +# Initialize scrapers +scrapers = { + "astrology.com": AstrologyComScraper(), + "horoscope.com": HoroscopeComScraper(), +} + +# HTML Template +HTML_TEMPLATE = ''' + + + + + + AI-Powered Daily Horoscopes + + + + +
+
+
+

🌟 AI-Powered Daily Horoscopes

+

Ready to integrate with astroastayogini.in

+
+
+ +
+
+
+
+
Test Single Horoscope
+
+
+
+
+ + +
+
+ + +
+ +
+
+
+
+ +
+
+
+
Integration Status
+
+
+
+ Scrapers: ✅ Astrology.com, ✅ Horoscope.com +
+
+ OpenAI: {{ '✅' if openai_configured else '❌' }} {{ 'Configured' if openai_configured else 'Needs API Key' }} +
+
+ WordPress: {{ '✅' if wordpress_configured else '❌' }} {{ 'Ready for astroastayogini.in' if wordpress_configured else 'Needs Configuration' }} +
+

Ready to integrate with your website at astroastayogini.in

+
+
+
+
+ +
+
+
+
+
+
+ + + + +''' + +@app.route('/') +def index(): + """Main page""" + openai_configured = bool(os.environ.get("OPENAI_API_KEY")) + wordpress_configured = bool(os.environ.get("WORDPRESS_API_URL")) + + return render_template_string( + HTML_TEMPLATE, + openai_configured=openai_configured, + wordpress_configured=wordpress_configured + ) + +@app.route('/api/health') +def health_check(): + """Health check endpoint""" + return jsonify({ + "status": "ok", + "message": "Horoscope API is running", + "scrapers": list(scrapers.keys()), + "openai_configured": bool(os.environ.get("OPENAI_API_KEY")), + "wordpress_configured": bool(os.environ.get("WORDPRESS_API_URL")) + }) + +@app.route('/api/test', methods=['POST']) +def test_scrape(): + """Test horoscope scraping""" + data = request.get_json() + if not data: + return jsonify({"error": "Missing request data"}), 400 + + sign = data.get('sign', '').lower() + source = data.get('source', 'astrology.com') + + if not sign: + return jsonify({"error": "Missing 'sign' parameter"}), 400 + + if sign not in ZODIAC_SIGNS: + return jsonify({"error": f"Invalid zodiac sign: {sign}"}), 400 + + if source not in scrapers: + return jsonify({"error": f"Unknown source: {source}"}), 400 + + try: + scraper = scrapers[source] + result = scraper.scrape_sign(scraper.base_url, sign) + return jsonify(result) + except Exception as e: + logger.error(f"Error testing scrape: {str(e)}") + return jsonify({"success": False, "error": str(e)}), 500 + +@app.route('/api/wordpress/config', methods=['POST']) +def configure_wordpress(): + """Configure WordPress integration for astroastayogini.in""" + data = request.get_json() + if not data: + return jsonify({"error": "Missing configuration data"}), 400 + + # This would typically save to environment or config file + # For now, just validate the provided configuration + required_fields = ['api_url', 'username', 'password'] + missing_fields = [field for field in required_fields if not data.get(field)] + + if missing_fields: + return jsonify({ + "error": f"Missing required fields: {', '.join(missing_fields)}" + }), 400 + + return jsonify({ + "success": True, + "message": "WordPress configuration received. In production, this would be securely stored." + }) + +if __name__ == "__main__": + logger.info("Starting Horoscope API for astroastayogini.in integration") + app.run(host="0.0.0.0", port=5000, debug=True) \ No newline at end of file diff --git a/scrapers/__init__.py b/scrapers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9032d6f1a3474869e091d67fd6cbf9e9027826c1 --- /dev/null +++ b/scrapers/__init__.py @@ -0,0 +1 @@ +# This file makes the scrapers directory a Python package diff --git a/scrapers/astrology_com_scraper.py b/scrapers/astrology_com_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..5238a3a39f0330d08e9035731e6139ec8c37236b --- /dev/null +++ b/scrapers/astrology_com_scraper.py @@ -0,0 +1,83 @@ +import logging +import re +from typing import Dict, Any, Optional +from bs4 import BeautifulSoup +from datetime import datetime, date +from .horoscope_scraper import HoroscopeScraper + +logger = logging.getLogger(__name__) + +class AstrologyComScraper(HoroscopeScraper): + """Scraper for Astrology.com horoscopes""" + + def __init__(self, timeout: int = 30): + super().__init__(timeout) + self.source_name = "Astrology.com" + self.base_url = "https://www.astrology.com/horoscope/daily" + + def _format_url(self, base_url: str, sign: str, date_str: Optional[str] = None) -> str: + """Format URL for astrology.com""" + # For astrology.com, the format is typically: + # https://www.astrology.com/horoscope/daily/[sign].html + # or for specific dates: + # https://www.astrology.com/horoscope/daily/[date]/[sign].html + + if date_str: + try: + # Convert YYYY-MM-DD to the format needed (YYYY-MM-DD) + formatted_date = date_str + return f"{base_url}/{formatted_date}/{sign.lower()}.html" + except Exception as e: + logger.error(f"Error formatting date: {str(e)}") + + # Default to current date if no date provided + return f"{base_url}/{sign.lower()}.html" + + def _extract_prediction(self, soup: BeautifulSoup, text_content: str) -> str: + """Extract horoscope prediction from astrology.com""" + # Astrology.com typically has the horoscope in a div with class containing 'daily-horoscope' + prediction_div = soup.select('div[class*="daily-horoscope"] p, .main-horoscope p') + + if prediction_div: + # Join all paragraphs in the prediction div + prediction = ' '.join([p.get_text().strip() for p in prediction_div]) + return prediction + + # Fallback to generic extraction + return super()._extract_prediction(soup, text_content) + + def _extract_date(self, soup: BeautifulSoup, url: str) -> str: + """Extract horoscope date from astrology.com""" + # Try to get date from URL first + date_match = re.search(r'(\d{4}-\d{2}-\d{2})', url) + if date_match: + return date_match.group(1) + + # Look for date in typical location + date_div = soup.select('.date-selector h2, .horoscope-date') + if date_div: + date_text = date_div[0].get_text().strip() + # Try to parse date (format typically like "May 13, 2025") + try: + parsed_date = datetime.strptime(date_text, '%B %d, %Y') + return parsed_date.strftime('%Y-%m-%d') + except ValueError: + # Try alternative format + try: + # Try to match month day, year pattern + match = re.search(r'(\w+)\s+(\d{1,2}),?\s+(\d{4})', date_text) + if match: + month, day, year = match.groups() + month_dict = { + 'january': 1, 'february': 2, 'march': 3, 'april': 4, + 'may': 5, 'june': 6, 'july': 7, 'august': 8, + 'september': 9, 'october': 10, 'november': 11, 'december': 12 + } + month_num = month_dict.get(month.lower(), 1) + parsed_date = datetime(int(year), month_num, int(day)) + return parsed_date.strftime('%Y-%m-%d') + except Exception: + pass + + # Default to today's date if no date found + return date.today().isoformat() \ No newline at end of file diff --git a/scrapers/base_scraper.py b/scrapers/base_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..059a6989b42a4899dede99980d62ac58cb3a1044 --- /dev/null +++ b/scrapers/base_scraper.py @@ -0,0 +1,69 @@ +import logging +import requests +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional +import trafilatura + +logger = logging.getLogger(__name__) + +class BaseScraper(ABC): + """Base class for all scrapers""" + + def __init__(self, timeout: int = 30): + self.timeout = timeout + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + } + + def fetch_url(self, url: str) -> Optional[str]: + """Fetch content from URL using trafilatura""" + try: + logger.debug(f"Fetching URL: {url}") + # Note: trafilatura.fetch_url doesn't accept a timeout parameter directly + downloaded = trafilatura.fetch_url(url) + if not downloaded: + logger.error(f"Failed to download content from {url}") + return None + return downloaded + except Exception as e: + logger.error(f"Error fetching URL {url}: {str(e)}") + return None + + def extract_text(self, html_content: str) -> Optional[str]: + """Extract main text content from HTML""" + try: + if not html_content: + return None + text = trafilatura.extract(html_content) + return text + except Exception as e: + logger.error(f"Error extracting text: {str(e)}") + return None + + def scrape(self, url: str) -> Dict[str, Any]: + """Main scrape method to be implemented by subclasses""" + try: + html_content = self.fetch_url(url) + if not html_content: + return {"success": False, "error": "Failed to fetch URL"} + + text_content = self.extract_text(html_content) + if not text_content: + return {"success": False, "error": "Failed to extract text content"} + + result = self.parse_content(html_content, text_content, url) + result["success"] = True + result["url"] = url + result["text_content"] = text_content + + return result + except Exception as e: + logger.error(f"Error in scraping {url}: {str(e)}") + return {"success": False, "error": str(e), "url": url} + + @abstractmethod + def parse_content(self, html_content: str, text_content: str, url: str) -> Dict[str, Any]: + """Parse the content and extract structured data""" + pass diff --git a/scrapers/blog_scraper.py b/scrapers/blog_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..a93cf19a8733de6df876d8edea42b1c064e45e86 --- /dev/null +++ b/scrapers/blog_scraper.py @@ -0,0 +1,158 @@ +import logging +import re +from typing import Dict, Any, List +from bs4 import BeautifulSoup +from .base_scraper import BaseScraper + +logger = logging.getLogger(__name__) + +class BlogScraper(BaseScraper): + """Scraper for blog websites""" + + def parse_content(self, html_content: str, text_content: str, url: str) -> Dict[str, Any]: + """Parse blog content and extract structured data""" + try: + soup = BeautifulSoup(html_content, 'html.parser') + + # Extract metadata + result = { + "type": "blog", + "title": self._extract_title(soup), + "publish_date": self._extract_publish_date(soup), + "author": self._extract_author(soup), + "categories": self._extract_categories(soup), + "tags": self._extract_tags(soup), + "summary": self._extract_summary(text_content), + "source": self._extract_domain(url), + } + + return result + except Exception as e: + logger.error(f"Error parsing blog content: {str(e)}") + return {"type": "blog", "error_parsing": str(e)} + + def _extract_title(self, soup: BeautifulSoup) -> str: + """Extract title from blog post""" + # Try different methods to find title + title = None + + # Method 1: Look for

tags in article or entry + article = soup.find(['article', 'div'], class_=re.compile('(post|entry|article)')) + if article: + h1 = article.find('h1') + if h1: + title = h1.get_text().strip() + + # Method 2: Look for any h1 if above failed + if not title: + h1_tags = soup.find_all('h1') + if h1_tags and len(h1_tags) > 0: + title = h1_tags[0].get_text().strip() + + # Method 3: Look for blog titles in meta tags + if not title: + og_title = soup.find('meta', property='og:title') + if og_title and og_title.get('content'): + title = og_title['content'].strip() + + return title or "Unknown Title" + + def _extract_publish_date(self, soup: BeautifulSoup) -> str: + """Extract publication date""" + # Try various methods to find date + date = None + + # Method 1: Look for common date meta tags + date_meta = soup.find('meta', property='article:published_time') + if date_meta and date_meta.get('content'): + date = date_meta['content'] + + # Method 2: Look for common blog date classes + if not date: + date_classes = ['date', 'post-date', 'entry-date', 'published', 'post-meta'] + for class_name in date_classes: + date_element = soup.find(class_=re.compile(class_name, re.I)) + if date_element: + date = date_element.get_text().strip() + break + + return date or "Unknown Date" + + def _extract_author(self, soup: BeautifulSoup) -> str: + """Extract author information""" + # Try various methods to find author + author = None + + # Method 1: Look for author meta tags + author_meta = soup.find('meta', property='article:author') + if author_meta and author_meta.get('content'): + author = author_meta['content'] + + # Method 2: Look for blog-specific author classes + if not author: + author_classes = ['author', 'byline', 'entry-author', 'post-author'] + for class_name in author_classes: + author_element = soup.find(class_=re.compile(class_name, re.I)) + if author_element: + author = author_element.get_text().strip() + break + + return author or "Unknown Author" + + def _extract_categories(self, soup: BeautifulSoup) -> List[str]: + """Extract blog post categories""" + categories = [] + + # Method 1: Look for category links + category_elements = soup.find_all('a', class_=re.compile('category')) + if category_elements: + for element in category_elements: + cat_text = element.get_text().strip() + if cat_text and cat_text not in categories: + categories.append(cat_text) + + # Method 2: Look for category meta tag + if not categories: + category_meta = soup.find('meta', property='article:section') + if category_meta and category_meta.get('content'): + categories.append(category_meta['content'].strip()) + + return categories + + def _extract_tags(self, soup: BeautifulSoup) -> List[str]: + """Extract blog post tags""" + tags = [] + + # Look for tag links + tag_elements = soup.find_all('a', class_=re.compile('tag')) + if tag_elements: + for element in tag_elements: + tag_text = element.get_text().strip() + if tag_text and tag_text not in tags: + tags.append(tag_text) + + return tags + + def _extract_summary(self, text_content: str) -> str: + """Extract or create a summary from the blog post text""" + if not text_content: + return "No summary available" + + # Take first paragraph or first few sentences (up to 300 chars) + paragraphs = text_content.split('\n\n') + if paragraphs: + summary = paragraphs[0] + if len(summary) > 300: + summary = summary[:297] + "..." + return summary + + return "No summary available" + + def _extract_domain(self, url: str) -> str: + """Extract domain from URL""" + try: + from urllib.parse import urlparse + parsed_url = urlparse(url) + return parsed_url.netloc + except Exception: + return "Unknown Source" diff --git a/scrapers/horoscope_com_scraper.py b/scrapers/horoscope_com_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..13f34efd5c260690079f89ad14a74f7d983467fa --- /dev/null +++ b/scrapers/horoscope_com_scraper.py @@ -0,0 +1,93 @@ +import logging +import re +from typing import Dict, Any, Optional +from bs4 import BeautifulSoup +from datetime import datetime, date +from .horoscope_scraper import HoroscopeScraper + +logger = logging.getLogger(__name__) + +class HoroscopeComScraper(HoroscopeScraper): + """Scraper for Horoscope.com daily horoscopes""" + + def __init__(self, timeout: int = 30): + super().__init__(timeout) + self.source_name = "Horoscope.com" + self.base_url = "https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx" + + def _format_url(self, base_url: str, sign: str, date_str: Optional[str] = None) -> str: + """Format URL for horoscope.com""" + # Map zodiac signs to their numeric ids used by horoscope.com + sign_ids = { + "aries": 1, "taurus": 2, "gemini": 3, "cancer": 4, + "leo": 5, "virgo": 6, "libra": 7, "scorpio": 8, + "sagittarius": 9, "capricorn": 10, "aquarius": 11, "pisces": 12 + } + + sign_id = sign_ids.get(sign.lower(), 1) + + if date_str: + try: + # Convert YYYY-MM-DD to the format needed (YYYYMMDD) + date_obj = datetime.strptime(date_str, '%Y-%m-%d') + formatted_date = date_obj.strftime('%Y%m%d') + return f"{self.base_url}?sign={sign_id}&laDate={formatted_date}" + except Exception as e: + logger.error(f"Error formatting date: {str(e)}") + + # Default to current date if no date provided + return f"{self.base_url}?sign={sign_id}" + + def _extract_prediction(self, soup: BeautifulSoup, text_content: str) -> str: + """Extract horoscope prediction from horoscope.com""" + # Horoscope.com typically has the horoscope in a div with class 'main-horoscope' + prediction_div = soup.select('.main-horoscope p') + + if prediction_div: + # Join all paragraphs in the prediction div + prediction = ' '.join([p.get_text().strip() for p in prediction_div]) + return prediction + + # Alternative selector + alt_div = soup.select('#textline') + if alt_div: + return alt_div[0].get_text().strip() + + # Fallback to generic extraction + return super()._extract_prediction(soup, text_content) + + def _extract_date(self, soup: BeautifulSoup, url: str) -> str: + """Extract horoscope date from horoscope.com""" + # Try to get date from URL first (in the laDate parameter) + date_match = re.search(r'laDate=(\d{8})', url) + if date_match: + date_str = date_match.group(1) + try: + parsed_date = datetime.strptime(date_str, '%Y%m%d') + return parsed_date.strftime('%Y-%m-%d') + except ValueError: + pass + + # Look for date in typical location + date_div = soup.select('.main-horoscope h1, .main-horoscope h2') + if date_div: + date_text = date_div[0].get_text().strip() + # Try to extract date (format typically like "Taurus Daily Horoscope for May 13, 2025") + try: + # Try to match month day, year pattern + match = re.search(r'(\w+)\s+(\d{1,2}),?\s+(\d{4})', date_text) + if match: + month, day, year = match.groups() + month_dict = { + 'january': 1, 'february': 2, 'march': 3, 'april': 4, + 'may': 5, 'june': 6, 'july': 7, 'august': 8, + 'september': 9, 'october': 10, 'november': 11, 'december': 12 + } + month_num = month_dict.get(month.lower(), 1) + parsed_date = datetime(int(year), month_num, int(day)) + return parsed_date.strftime('%Y-%m-%d') + except Exception: + pass + + # Default to today's date if no date found + return date.today().isoformat() \ No newline at end of file diff --git a/scrapers/horoscope_scraper.py b/scrapers/horoscope_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..26e49c2c0b4429b3a93b3e4f24a81e6ae2e375f9 --- /dev/null +++ b/scrapers/horoscope_scraper.py @@ -0,0 +1,160 @@ +import logging +import re +from typing import Dict, Any, List, Optional +from datetime import datetime, date +from bs4 import BeautifulSoup +import requests +from .base_scraper import BaseScraper + +logger = logging.getLogger(__name__) + +class HoroscopeScraper(BaseScraper): + """Scraper for horoscope websites""" + + # List of valid zodiac signs + ZODIAC_SIGNS = [ + "aries", "taurus", "gemini", "cancer", + "leo", "virgo", "libra", "scorpio", + "sagittarius", "capricorn", "aquarius", "pisces" + ] + + def __init__(self, timeout: int = 30): + super().__init__(timeout) + self.source_name = "Generic Horoscope Site" + + def scrape_all_signs(self, base_url: str, date_str: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Scrape horoscopes for all zodiac signs + + Args: + base_url: Base URL for the horoscope site + date_str: Optional date string in format YYYY-MM-DD + + Returns: + List of dictionaries with horoscope data for each sign + """ + results = [] + + for sign in self.ZODIAC_SIGNS: + try: + horoscope_data = self.scrape_sign(base_url, sign, date_str) + if horoscope_data and horoscope_data.get('success', False): + results.append(horoscope_data) + except Exception as e: + logger.error(f"Error scraping {sign} horoscope: {str(e)}") + + return results + + def scrape_sign(self, base_url: str, sign: str, date_str: Optional[str] = None) -> Dict[str, Any]: + """ + Scrape horoscope for a specific zodiac sign + + Args: + base_url: Base URL for the horoscope site + sign: Zodiac sign (lowercase) + date_str: Optional date string in format YYYY-MM-DD + + Returns: + Dictionary with horoscope data + """ + # Validate sign + if sign.lower() not in self.ZODIAC_SIGNS: + return {"success": False, "error": f"Invalid zodiac sign: {sign}"} + + # Format the URL for the specific sign + url = self._format_url(base_url, sign, date_str) + + # Use the base scraper method to get the content + result = self.scrape(url) + + # Add additional horoscope-specific metadata + result["sign"] = sign.lower() + result["scraped_date"] = date_str if date_str else date.today().isoformat() + result["source_name"] = self.source_name + + return result + + def parse_content(self, html_content: str, text_content: str, url: str) -> Dict[str, Any]: + """Parse horoscope content and extract structured data""" + try: + soup = BeautifulSoup(html_content, 'html.parser') + + # Extract metadata - this is a generic implementation + # Specific horoscope sites will need custom implementations + result = { + "type": "horoscope", + "prediction": self._extract_prediction(soup, text_content), + "date": self._extract_date(soup, url), + "source": self._extract_domain(url), + } + + return result + except Exception as e: + logger.error(f"Error parsing horoscope content: {str(e)}") + return {"type": "horoscope", "error_parsing": str(e)} + + def _format_url(self, base_url: str, sign: str, date_str: Optional[str] = None) -> str: + """ + Format URL for horoscope site. This is a generic implementation. + Should be overridden in specific scrapers. + """ + # Default implementation just appends the sign to the base URL + return f"{base_url.rstrip('/')}/{sign.lower()}" + + def _extract_prediction(self, soup: BeautifulSoup, text_content: str) -> str: + """ + Extract horoscope prediction text. + Generic implementation - should be overridden in specific scrapers. + """ + # Default implementation just returns the first paragraph or the text content + prediction = "" + + # Look for common horoscope content containers + containers = soup.select('.horoscope-content, .prediction, .horoscope-prediction, article p') + if containers: + prediction = containers[0].get_text().strip() + + # If no prediction was found, use the first few paragraphs from text content + if not prediction and text_content: + paragraphs = text_content.split('\n\n') + prediction = paragraphs[0] if paragraphs else text_content[:500] + + return prediction or "No prediction available" + + def _extract_date(self, soup: BeautifulSoup, url: str) -> str: + """ + Extract horoscope date. + Generic implementation - should be overridden in specific scrapers. + """ + # Look for date in URL + date_match = re.search(r'(\d{4}-\d{2}-\d{2})', url) + if date_match: + return date_match.group(1) + + # Look for date in common elements + date_elements = soup.select('.horoscope-date, .date, time') + if date_elements: + date_text = date_elements[0].get_text().strip() + # Try to parse various date formats + try: + # Try common formats + for fmt in ['%Y-%m-%d', '%B %d, %Y', '%d %B %Y', '%m/%d/%Y', '%d/%m/%Y']: + try: + parsed_date = datetime.strptime(date_text, fmt) + return parsed_date.strftime('%Y-%m-%d') + except ValueError: + continue + except Exception: + pass + + # Default to today's date if no date found + return date.today().isoformat() + + def _extract_domain(self, url: str) -> str: + """Extract domain from URL""" + try: + from urllib.parse import urlparse + parsed_url = urlparse(url) + return parsed_url.netloc + except Exception: + return "Unknown Source" \ No newline at end of file diff --git a/scrapers/iso20022_scraper.py b/scrapers/iso20022_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..78e154703369c91ae89ff7c5ee3e39937cc0f66f --- /dev/null +++ b/scrapers/iso20022_scraper.py @@ -0,0 +1,236 @@ +""" +ISO20022 SWIFT MT564 Documentation Scraper + +This script scrapes the SWIFT MT564 (Corporate Action Notification) documentation +from the ISO20022 website and converts it into structured JSON data for model training. +""" + +import os +import json +import argparse +import logging +from typing import Dict, List, Any, Optional +import requests +from bs4 import BeautifulSoup +import trafilatura + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Constants +BASE_URL = "https://www.iso20022.org/15022/uhb/finmt564.htm" +OUTPUT_DIR = "../data/raw" + +def parse_args(): + parser = argparse.ArgumentParser(description="Scrape SWIFT MT564 documentation from ISO20022 website") + parser.add_argument( + "--url", + type=str, + default=BASE_URL, + help="URL of the ISO20022 SWIFT MT564 documentation" + ) + parser.add_argument( + "--output_dir", + type=str, + default=OUTPUT_DIR, + help="Directory to save scraped data" + ) + parser.add_argument( + "--format", + type=str, + choices=["json", "txt"], + default="json", + help="Output format (json or txt)" + ) + return parser.parse_args() + +def fetch_page(url: str) -> Optional[str]: + """Fetch HTML content from URL""" + try: + logger.info(f"Fetching {url}") + response = requests.get(url, timeout=30) + response.raise_for_status() + return response.text + except requests.RequestException as e: + logger.error(f"Error fetching {url}: {e}") + return None + +def extract_text_with_trafilatura(html: str) -> Optional[str]: + """Extract main text content using trafilatura""" + try: + return trafilatura.extract(html) + except Exception as e: + logger.error(f"Error extracting text with trafilatura: {e}") + return None + +def extract_table_structure(soup: BeautifulSoup) -> List[Dict[str, Any]]: + """Extract structured data from tables in the documentation""" + tables = soup.find_all("table") + results = [] + + for table_idx, table in enumerate(tables): + logger.info(f"Processing table {table_idx+1} of {len(tables)}") + + # Extract table headers + headers = [] + header_row = table.find("tr") + if header_row: + for th in header_row.find_all(["th", "td"]): + headers.append(th.text.strip()) + + # Process rows + rows = [] + for row in table.find_all("tr")[1:]: # Skip header row + cell_data = {} + cells = row.find_all(["td", "th"]) + + for i, cell in enumerate(cells): + if i < len(headers): + header = headers[i] + cell_data[header] = cell.text.strip() + else: + # Handle case where cell doesn't have a matching header + cell_data[f"column_{i}"] = cell.text.strip() + + if cell_data: + rows.append(cell_data) + + results.append({ + "table_id": table_idx + 1, + "headers": headers, + "rows": rows + }) + + return results + +def extract_sequence_information(soup: BeautifulSoup) -> Dict[str, Any]: + """Extract information about MT564 sequence structure""" + sequences = [] + + # Look for sequence headers (typically h2 or h3 elements) + sequence_headers = soup.find_all(["h2", "h3", "h4"]) + + for header in sequence_headers: + text = header.text.strip() + # Check if it's a sequence header (contains "Sequence" and a letter) + if "sequence" in text.lower() and any(f"sequence {letter}" in text.lower() for letter in "abcdef"): + sequence_name = text + + # Find details about this sequence + details = [] + next_element = header.find_next() + while next_element and next_element.name not in ["h2", "h3", "h4"]: + if next_element.name == "p": + details.append(next_element.text.strip()) + next_element = next_element.find_next() + + # Find the table immediately following this header + table = header.find_next("table") + fields = [] + + if table: + rows = table.find_all("tr")[1:] # Skip header row + for row in rows: + cells = row.find_all(["td", "th"]) + if len(cells) >= 3: + field = { + "tag": cells[0].text.strip(), + "name": cells[1].text.strip(), + "status": cells[2].text.strip() + } + if len(cells) > 3: + field["format"] = cells[3].text.strip() + fields.append(field) + + sequences.append({ + "name": sequence_name, + "details": details, + "fields": fields + }) + + return {"sequences": sequences} + +def parse_documentation(html: str) -> Dict[str, Any]: + """Parse the MT564 documentation and extract structured information""" + soup = BeautifulSoup(html, "html.parser") + + # Extract general information + title = soup.title.text.strip() if soup.title else "MT564 Documentation" + + # Get all paragraphs + paragraphs = [p.text.strip() for p in soup.find_all("p") if p.text.strip()] + + # Extract tables + tables = extract_table_structure(soup) + + # Extract sequence information + sequence_info = extract_sequence_information(soup) + + # Combine all information + result = { + "title": title, + "general_description": paragraphs[:3] if len(paragraphs) >= 3 else paragraphs, + "tables": tables, + "sequences": sequence_info["sequences"] + } + + return result + +def save_output(data: Dict[str, Any], output_dir: str, format: str = "json"): + """Save extracted data to the output directory""" + os.makedirs(output_dir, exist_ok=True) + + if format == "json": + output_file = os.path.join(output_dir, "mt564_documentation.json") + with open(output_file, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Saved JSON data to {output_file}") + else: # txt format + output_file = os.path.join(output_dir, "mt564_documentation.txt") + with open(output_file, "w", encoding="utf-8") as f: + f.write(f"# {data['title']}\n\n") + f.write("## General Description\n\n") + for para in data["general_description"]: + f.write(f"{para}\n\n") + + f.write("## Sequences\n\n") + for seq in data["sequences"]: + f.write(f"### {seq['name']}\n\n") + for detail in seq["details"]: + f.write(f"{detail}\n\n") + f.write("#### Fields\n\n") + for field in seq["fields"]: + f.write(f"- {field['tag']}: {field['name']} ({field['status']})\n") + f.write("\n") + + logger.info(f"Saved text data to {output_file}") + +def main(): + args = parse_args() + + # Fetch the HTML content + html_content = fetch_page(args.url) + if not html_content: + logger.error("Failed to fetch the documentation. Exiting.") + return + + # Extract and save raw text content + text_content = extract_text_with_trafilatura(html_content) + if text_content: + os.makedirs(args.output_dir, exist_ok=True) + raw_text_file = os.path.join(args.output_dir, "mt564_raw_text.txt") + with open(raw_text_file, "w", encoding="utf-8") as f: + f.write(text_content) + logger.info(f"Saved raw text to {raw_text_file}") + + # Parse structured information + structured_data = parse_documentation(html_content) + + # Save output in the requested format + save_output(structured_data, args.output_dir, args.format) + + logger.info("Scraping complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scrapers/news_scraper.py b/scrapers/news_scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..1194233fbe3c23d804488dfb39e1fbe4fa33bfb6 --- /dev/null +++ b/scrapers/news_scraper.py @@ -0,0 +1,133 @@ +import logging +import re +from typing import Dict, Any +from bs4 import BeautifulSoup +from .base_scraper import BaseScraper + +logger = logging.getLogger(__name__) + +class NewsScraper(BaseScraper): + """Scraper for news websites""" + + def parse_content(self, html_content: str, text_content: str, url: str) -> Dict[str, Any]: + """Parse news content and extract structured data""" + try: + soup = BeautifulSoup(html_content, 'html.parser') + + # Try to extract metadata + result = { + "type": "news", + "title": self._extract_title(soup), + "publish_date": self._extract_publish_date(soup), + "author": self._extract_author(soup), + "summary": self._extract_summary(text_content), + "source": self._extract_domain(url), + } + + return result + except Exception as e: + logger.error(f"Error parsing news content: {str(e)}") + return {"type": "news", "error_parsing": str(e)} + + def _extract_title(self, soup: BeautifulSoup) -> str: + """Extract title from article""" + # Try different methods to find title + title = None + + # Method 1: Look for

tags + h1_tags = soup.find_all('h1') + if h1_tags and len(h1_tags) > 0: + title = h1_tags[0].get_text().strip() + + # Method 2: Look for article titles in meta tags + if not title: + og_title = soup.find('meta', property='og:title') + if og_title and og_title.get('content'): + title = og_title['content'].strip() + + # Method 3: Use the document title + if not title: + title_tag = soup.find('title') + if title_tag: + title = title_tag.get_text().strip() + + return title or "Unknown Title" + + def _extract_publish_date(self, soup: BeautifulSoup) -> str: + """Extract publication date""" + # Try various methods to find date + date = None + + # Method 1: Look for common date meta tags + date_meta = soup.find('meta', property='article:published_time') + if date_meta and date_meta.get('content'): + date = date_meta['content'] + + # Method 2: Look for time tags + if not date: + time_tag = soup.find('time') + if time_tag and time_tag.get('datetime'): + date = time_tag['datetime'] + elif time_tag: + date = time_tag.get_text().strip() + + # Method 3: Look for date in common class names + if not date: + date_classes = ['date', 'article-date', 'publish-date', 'timestamp'] + for class_name in date_classes: + date_element = soup.find(class_=re.compile(class_name, re.I)) + if date_element: + date = date_element.get_text().strip() + break + + return date or "Unknown Date" + + def _extract_author(self, soup: BeautifulSoup) -> str: + """Extract author information""" + # Try various methods to find author + author = None + + # Method 1: Look for author meta tags + author_meta = soup.find('meta', property='article:author') + if author_meta and author_meta.get('content'): + author = author_meta['content'] + + # Method 2: Look for author in common class names + if not author: + author_classes = ['author', 'byline', 'writer'] + for class_name in author_classes: + author_element = soup.find(class_=re.compile(class_name, re.I)) + if author_element: + author = author_element.get_text().strip() + break + + # Method 3: Look for rel="author" link + if not author: + author_link = soup.find('a', rel='author') + if author_link: + author = author_link.get_text().strip() + + return author or "Unknown Author" + + def _extract_summary(self, text_content: str) -> str: + """Extract or create a summary from the article text""" + if not text_content: + return "No summary available" + + # Take first few sentences (up to 500 chars) + sentences = text_content.split('.') + summary = '.'.join(sentences[:3]) + + if len(summary) > 500: + summary = summary[:497] + "..." + + return summary + + def _extract_domain(self, url: str) -> str: + """Extract domain from URL""" + try: + from urllib.parse import urlparse + parsed_url = urlparse(url) + return parsed_url.netloc + except Exception: + return "Unknown Source" diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fbfabb866d91a8e8f0c90c763f88bdadfecb1a99 --- /dev/null +++ b/services/__init__.py @@ -0,0 +1 @@ +# This file makes the services directory a Python package diff --git a/services/horoscope_service.py b/services/horoscope_service.py new file mode 100644 index 0000000000000000000000000000000000000000..eadc3e00b88d633f2d3b3ce9ad8a74789ea70da9 --- /dev/null +++ b/services/horoscope_service.py @@ -0,0 +1,329 @@ +import logging +import json +from typing import Dict, Any, List, Optional +from datetime import datetime, date +import time +from concurrent.futures import ThreadPoolExecutor, as_completed + +from scrapers.horoscope_scraper import HoroscopeScraper +from scrapers.astrology_com_scraper import AstrologyComScraper +from scrapers.horoscope_com_scraper import HoroscopeComScraper +from utils.rate_limiter import RateLimiter +from models import db, Horoscope +from app import app + +logger = logging.getLogger(__name__) + +class HoroscopeService: + """Service to manage horoscope scraping operations""" + + def __init__(self, max_workers: int = 3): + """ + Initialize horoscope service + + Args: + max_workers: Maximum number of concurrent scrapers + """ + self.max_workers = max_workers + self.rate_limiters = {} # Domain-specific rate limiters + + # Register available scrapers + self.scrapers = { + "astrology.com": AstrologyComScraper(), + "horoscope.com": HoroscopeComScraper(), + } + + def _get_rate_limiter(self, domain: str) -> RateLimiter: + """Get or create a rate limiter for a specific domain""" + if domain not in self.rate_limiters: + # Default: 5 requests per minute for each domain + self.rate_limiters[domain] = RateLimiter(window_size=60, max_requests=5) + return self.rate_limiters[domain] + + def scrape_sign(self, source: str, sign: str, date_str: Optional[str] = None) -> Dict[str, Any]: + """ + Scrape horoscope for a specific sign from a specific source + + Args: + source: Source name (e.g., 'astrology.com') + sign: Zodiac sign + date_str: Optional date string (YYYY-MM-DD) + + Returns: + Dictionary with horoscope data + """ + if source not in self.scrapers: + return {"success": False, "error": f"Unknown source: {source}"} + + scraper = self.scrapers[source] + base_url = scraper.base_url + + # Apply rate limiting + rate_limiter = self._get_rate_limiter(source) + if not rate_limiter.can_proceed(): + wait_time = rate_limiter.get_wait_time() + logger.warning(f"Rate limit reached for {source}. Waiting {wait_time:.2f} seconds") + time.sleep(wait_time) + + # Perform scraping + result = scraper.scrape_sign(base_url, sign, date_str) + rate_limiter.record_request() + + # Save to database if successful + if result.get('success', False): + self._save_to_database(result, source, sign, date_str) + + return result + + def scrape_all_signs(self, source: str, date_str: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Scrape horoscopes for all zodiac signs from a specific source + + Args: + source: Source name (e.g., 'astrology.com') + date_str: Optional date string (YYYY-MM-DD) + + Returns: + List of dictionaries with horoscope data + """ + if source not in self.scrapers: + return [{"success": False, "error": f"Unknown source: {source}"}] + + scraper = self.scrapers[source] + zodiac_signs = scraper.ZODIAC_SIGNS + results = [] + + # Use ThreadPoolExecutor for concurrent scraping + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + # Submit scraping tasks + future_to_sign = { + executor.submit(self.scrape_sign, source, sign, date_str): sign + for sign in zodiac_signs + } + + # Collect results as they complete + for future in as_completed(future_to_sign): + sign = future_to_sign[future] + try: + result = future.result() + results.append(result) + logger.info(f"Completed scraping {sign} horoscope from {source}") + except Exception as e: + logger.error(f"Exception scraping {sign} from {source}: {str(e)}") + results.append({ + "success": False, + "sign": sign, + "source": source, + "error": str(e), + "scraped_at": time.time() + }) + + return results + + def scrape_sign_from_all_sources(self, sign: str, date_str: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Scrape horoscope for a specific sign from all available sources + + Args: + sign: Zodiac sign + date_str: Optional date string (YYYY-MM-DD) + + Returns: + List of dictionaries with horoscope data + """ + results = [] + + # Use ThreadPoolExecutor for concurrent scraping + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + # Submit scraping tasks + future_to_source = { + executor.submit(self.scrape_sign, source, sign, date_str): source + for source in self.scrapers.keys() + } + + # Collect results as they complete + for future in as_completed(future_to_source): + source = future_to_source[future] + try: + result = future.result() + results.append(result) + logger.info(f"Completed scraping {sign} horoscope from {source}") + except Exception as e: + logger.error(f"Exception scraping {sign} from {source}: {str(e)}") + results.append({ + "success": False, + "sign": sign, + "source": source, + "error": str(e), + "scraped_at": time.time() + }) + + return results + + def scrape_all_horoscopes(self, date_str: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Scrape horoscopes for all signs from all sources + + Args: + date_str: Optional date string (YYYY-MM-DD) + + Returns: + List of dictionaries with horoscope data + """ + all_results = [] + + for source in self.scrapers.keys(): + results = self.scrape_all_signs(source, date_str) + all_results.extend(results) + + return all_results + + def _save_to_database(self, result: Dict[str, Any], source: str, sign: str, date_str: Optional[str] = None) -> None: + """Save horoscope data to database""" + try: + # Extract data from result + prediction = result.get('prediction', '') + + # Parse date + if date_str: + horoscope_date = datetime.strptime(date_str, '%Y-%m-%d').date() + else: + # Use the date from the scraper or today + horoscope_date = datetime.strptime(result.get('date', date.today().isoformat()), '%Y-%m-%d').date() + + with app.app_context(): + # Check if horoscope already exists for this sign, date, and source + existing = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date, + source=source + ).first() + + if existing: + # Update existing horoscope + existing.prediction = prediction + db.session.commit() + logger.info(f"Updated horoscope for {sign} on {horoscope_date} from {source}") + else: + # Create new horoscope + horoscope = Horoscope() + horoscope.sign = sign.lower() + horoscope.date = horoscope_date + horoscope.prediction = prediction + horoscope.source = source + db.session.add(horoscope) + db.session.commit() + logger.info(f"Added horoscope for {sign} on {horoscope_date} from {source}") + except Exception as e: + logger.error(f"Error saving horoscope to database: {str(e)}") + + def get_horoscope(self, sign: str, date_str: Optional[str] = None, source: Optional[str] = None) -> Dict[str, Any]: + """ + Retrieve horoscope from database + + Args: + sign: Zodiac sign + date_str: Optional date string (YYYY-MM-DD) + source: Optional source name + + Returns: + Dictionary with horoscope data + """ + try: + # Parse date + if date_str: + horoscope_date = datetime.strptime(date_str, '%Y-%m-%d').date() + else: + horoscope_date = date.today() + + with app.app_context(): + query = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date + ) + + if source: + query = query.filter_by(source=source) + + horoscopes = query.all() + + if not horoscopes: + # If no horoscope found, try to scrape it + if source: + self.scrape_sign(source, sign, date_str) + # Try to fetch again + horoscope = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date, + source=source + ).first() + if horoscope: + return horoscope.to_dict() + else: + # Try all sources + self.scrape_sign_from_all_sources(sign, date_str) + # Try to fetch again + horoscopes = Horoscope.query.filter_by( + sign=sign.lower(), + date=horoscope_date + ).all() + + if not horoscopes: + return {"error": f"No horoscope found for {sign} on {horoscope_date}"} + + # If multiple horoscopes found, return them all + if len(horoscopes) > 1: + return {"horoscopes": [h.to_dict() for h in horoscopes]} + else: + return horoscopes[0].to_dict() + + except Exception as e: + logger.error(f"Error getting horoscope from database: {str(e)}") + return {"error": str(e)} + + def get_horoscopes_for_date(self, date_str: Optional[str] = None) -> Dict[str, Any]: + """ + Retrieve all horoscopes for a specific date + + Args: + date_str: Optional date string (YYYY-MM-DD) + + Returns: + Dictionary with horoscope data for all signs + """ + try: + # Parse date + if date_str: + horoscope_date = datetime.strptime(date_str, '%Y-%m-%d').date() + else: + horoscope_date = date.today() + + with app.app_context(): + horoscopes = Horoscope.query.filter_by(date=horoscope_date).all() + + if not horoscopes: + # If no horoscopes found, try to scrape them + self.scrape_all_horoscopes(date_str) + # Try to fetch again + horoscopes = Horoscope.query.filter_by(date=horoscope_date).all() + + if not horoscopes: + return {"error": f"No horoscopes found for {horoscope_date}"} + + # Group by sign + result = {} + for horoscope in horoscopes: + sign = horoscope.sign + if sign not in result: + result[sign] = [] + result[sign].append(horoscope.to_dict()) + + return {"date": horoscope_date.isoformat(), "horoscopes": result} + + except Exception as e: + logger.error(f"Error getting horoscopes for date: {str(e)}") + return {"error": str(e)} + + +# Create a singleton instance +horoscope_service = HoroscopeService() \ No newline at end of file diff --git a/services/llm_service.py b/services/llm_service.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc93690076b31769979680ef1616bfd08c6f6a5 --- /dev/null +++ b/services/llm_service.py @@ -0,0 +1,191 @@ +import os +import json +import logging +from typing import Dict, Any, List, Optional +from openai import OpenAI + +logger = logging.getLogger(__name__) +import os +from dotenv import load_dotenv +load_dotenv() + +print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY")) +class LLMService: + """Service for interacting with OpenAI LLM to process and consolidate scraped data""" + + def __init__(self, model_name: str = "gpt-4o"): + """ + Initialize LLM service + + Args: + model_name: Name of the OpenAI model to use (default: gpt-4o) + """ + # the newest OpenAI model is "gpt-4o" which was released May 13, 2024. + # do not change this unless explicitly requested by the user + self.model_name = model_name + self.api_key = os.environ.get("OPENAI_API_KEY") + + if not self.api_key: + logger.warning("OpenAI API key not found in environment variables") + + self.client = OpenAI(api_key=self.api_key) + + # This method will be implemented in api/horoscope_routes.py + def consolidate_horoscopes(self, horoscope_data): + """Placeholder method for consolidating horoscopes""" + return {"error": "Method not implemented"} + + def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Consolidate data from multiple sources using LLM + + Args: + scraped_data: List of scraped data from different sources + + Returns: + Consolidated information as a dictionary + """ + if not scraped_data: + return {"error": "No data provided for consolidation"} + + try: + # Prepare data for LLM + sources_text = "" + for i, data in enumerate(scraped_data, 1): + source_type = data.get("type", "unknown") + title = data.get("title", "Unknown Title") + source = data.get("source", "Unknown Source") + text = data.get("text_content", "No content available") + + sources_text += f"SOURCE {i} ({source_type} from {source}):\n" + sources_text += f"Title: {title}\n" + sources_text += f"Content: {text[:2000]}...\n\n" + + # Create prompt for consolidation + prompt = f""" + Please analyze and consolidate the following information from multiple sources. + + {sources_text} + + Provide a comprehensive consolidation of this information in JSON format with the following structure: + {{ + "main_topics": [list of main topics covered], + "key_points": [list of key factual points from all sources], + "summary": "A 2-3 paragraph summary that synthesizes the information", + "analysis": "Brief analysis of the information and any discrepancies between sources", + "sources": [list of sources used] + }} + + Only include factual information present in the sources. Do not add any speculative or additional information. + """ + + # Call OpenAI API + response = self.client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."}, + {"role": "user", "content": prompt} + ], + response_format={"type": "json_object"}, + temperature=0.2 + ) + + # Parse the response + content = response.choices[0].message.content + if content: + result = json.loads(content) + return result + return {"error": "Empty response from LLM"} + + except Exception as e: + logger.error(f"Error consolidating data with LLM: {str(e)}") + return {"error": f"Failed to consolidate data: {str(e)}"} + + def summarize_content(self, text: str, max_length: int = 500) -> str: + """ + Summarize a single piece of content + + Args: + text: Text to summarize + max_length: Maximum length of summary in characters + + Returns: + Summarized text + """ + if not text: + return "No content to summarize" + + try: + prompt = f""" + Please summarize the following text concisely in no more than {max_length} characters, + while maintaining all key information: + + {text[:10000]} + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": "You are a summarization expert."}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=max_length // 2 # Approximate token count + ) + + return response.choices[0].message.content + + except Exception as e: + logger.error(f"Error summarizing content with LLM: {str(e)}") + return f"Failed to summarize content: {str(e)}" + + def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]: + """ + Extract specific type of information from content + + Args: + text: Text to extract information from + info_type: Type of information to extract (e.g., "news", "product", "research") + + Returns: + Extracted information as dictionary + """ + if not text: + return {"error": "No content provided"} + + try: + type_instruction = "" + if info_type: + type_instruction = f"This is {info_type} content. " + + prompt = f""" + {type_instruction}Please extract key structured information from the following text. + Return the result as a JSON object with appropriate fields based on the content type. + + {text[:8000]} + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": "You are a data extraction expert."}, + {"role": "user", "content": prompt} + ], + response_format={"type": "json_object"}, + temperature=0.1 + ) + + # Parse the response + content = response.choices[0].message.content + if content: + result = json.loads(content) + return result + return {"error": "Empty response from LLM"} + + except Exception as e: + logger.error(f"Error extracting information with LLM: {str(e)}") + return {"error": f"Failed to extract information: {str(e)}"} + + +# Create a singleton instance +llm_service = LLMService() diff --git a/services/scheduler_service.py b/services/scheduler_service.py new file mode 100644 index 0000000000000000000000000000000000000000..1257a5c7dd23f0c0242eea91b39dafa565d79256 --- /dev/null +++ b/services/scheduler_service.py @@ -0,0 +1,439 @@ +import logging +import threading +import time +import schedule +from datetime import datetime, timedelta +from typing import Dict, Any, List, Callable, Optional + +from app import app +from models import db, ScheduledJob + +logger = logging.getLogger(__name__) + +class SchedulerService: + """Service for managing scheduled scraping jobs""" + + def __init__(self): + """Initialize the scheduler service""" + self.scheduler = schedule + self.running = False + self.thread = None + self.jobs = {} # Store job references by name + + def start(self): + """Start the scheduler in a background thread""" + if self.running: + logger.warning("Scheduler already running") + return + + self.running = True + self.thread = threading.Thread(target=self._run_schedule) + self.thread.daemon = True + self.thread.start() + logger.info("Scheduler started") + + # Load jobs from database + self._load_jobs_from_db() + + def stop(self): + """Stop the scheduler""" + if not self.running: + logger.warning("Scheduler not running") + return + + self.running = False + if self.thread: + self.thread.join(timeout=5) + self.thread = None + + # Clear all jobs + schedule.clear() + self.jobs = {} + + logger.info("Scheduler stopped") + + def _run_schedule(self): + """Run the scheduler loop""" + while self.running: + self.scheduler.run_pending() + time.sleep(1) + + def _load_jobs_from_db(self): + """Load scheduled jobs from database""" + try: + with app.app_context(): + jobs = ScheduledJob.query.filter_by(enabled=True).all() + for job in jobs: + self.add_job( + job.name, + job.frequency, + None, # We'll map by name in add_job + update_db=False # Don't create duplicate DB entries + ) + logger.info(f"Loaded {len(jobs)} scheduled jobs from database") + except Exception as e: + logger.error(f"Error loading jobs from database: {str(e)}") + + def add_job(self, name: str, frequency: str, job_func: Optional[Callable] = None, update_db: bool = True) -> bool: + """ + Add a new scheduled job + + Args: + name: Unique name for the job + frequency: Frequency string (e.g., 'daily', 'hourly', '30 minutes') + job_func: Function to call when job runs + update_db: Whether to update the database with the new job + + Returns: + Success status + """ + if name in self.jobs: + logger.warning(f"Job '{name}' already exists") + return False + + # Map job name to function if not provided + if job_func is None: + job_func = self._get_job_function(name) + if not job_func: + logger.error(f"No function mapped for job '{name}'") + return False + + # Schedule job based on frequency + scheduled_job = self._schedule_by_frequency(name, frequency, job_func) + if not scheduled_job: + logger.error(f"Failed to schedule job '{name}' with frequency '{frequency}'") + return False + + # Store job reference + self.jobs[name] = scheduled_job + + # Update database + if update_db: + try: + with app.app_context(): + # Check if job already exists + existing = ScheduledJob.query.filter_by(name=name).first() + if existing: + existing.frequency = frequency + existing.enabled = True + db.session.commit() + else: + # Calculate next run time + next_run = self._calculate_next_run(frequency) + + # Create new job + job = ScheduledJob() + job.name = name + job.frequency = frequency + job.next_run = next_run + job.enabled = True + db.session.add(job) + db.session.commit() + logger.info(f"Added job '{name}' to database") + except Exception as e: + logger.error(f"Error adding job to database: {str(e)}") + return False + + logger.info(f"Added job '{name}' with frequency '{frequency}'") + return True + + def remove_job(self, name: str) -> bool: + """ + Remove a scheduled job + + Args: + name: Name of the job to remove + + Returns: + Success status + """ + if name not in self.jobs: + logger.warning(f"Job '{name}' not found") + return False + + # Cancel the job + scheduled_job = self.jobs[name] + self.scheduler.cancel_job(scheduled_job) + del self.jobs[name] + + # Update database + try: + with app.app_context(): + job = ScheduledJob.query.filter_by(name=name).first() + if job: + job.enabled = False + db.session.commit() + logger.info(f"Disabled job '{name}' in database") + else: + logger.warning(f"Job '{name}' not found in database") + except Exception as e: + logger.error(f"Error removing job from database: {str(e)}") + return False + + logger.info(f"Removed job '{name}'") + return True + + def get_all_jobs(self) -> List[Dict[str, Any]]: + """Get list of all scheduled jobs""" + job_list = [] + + try: + with app.app_context(): + jobs = ScheduledJob.query.all() + for job in jobs: + job_info = job.to_dict() + job_info["active"] = job.name in self.jobs + job_list.append(job_info) + except Exception as e: + logger.error(f"Error getting jobs from database: {str(e)}") + + return job_list + + def update_job_status(self, name: str, success: bool) -> bool: + """ + Update job status after running + + Args: + name: Name of the job + success: Whether the job ran successfully + + Returns: + Success status + """ + try: + with app.app_context(): + job = ScheduledJob.query.filter_by(name=name).first() + if job: + job.last_run = datetime.utcnow() + job.next_run = self._calculate_next_run(job.frequency) + db.session.commit() + logger.info(f"Updated job '{name}' status") + return True + else: + logger.warning(f"Job '{name}' not found in database") + except Exception as e: + logger.error(f"Error updating job status: {str(e)}") + + return False + + def _schedule_by_frequency(self, name: str, frequency: str, job_func: Callable) -> Optional[schedule.Job]: + """Schedule a job based on frequency string""" + # Create a wrapper function to update job status + def job_wrapper(): + try: + logger.info(f"Running scheduled job: {name}") + result = job_func() + self.update_job_status(name, True) + return result + except Exception as e: + logger.error(f"Error running job '{name}': {str(e)}") + self.update_job_status(name, False) + + # Schedule based on frequency patterns + if frequency == 'daily': + return schedule.every().day.at("00:00").do(job_wrapper) + elif frequency == 'hourly': + return schedule.every().hour.do(job_wrapper) + elif 'minutes' in frequency or 'minute' in frequency: + # Extract number of minutes (e.g., '30 minutes' -> 30) + try: + minutes = int(frequency.split()[0]) + return schedule.every(minutes).minutes.do(job_wrapper) + except (ValueError, IndexError): + logger.error(f"Invalid minutes format: {frequency}") + return None + elif 'hours' in frequency or 'hour' in frequency: + # Extract number of hours (e.g., '6 hours' -> 6) + try: + hours = int(frequency.split()[0]) + return schedule.every(hours).hours.do(job_wrapper) + except (ValueError, IndexError): + logger.error(f"Invalid hours format: {frequency}") + return None + elif frequency == 'weekly': + return schedule.every().week.do(job_wrapper) + else: + logger.error(f"Unsupported frequency: {frequency}") + return None + + def _calculate_next_run(self, frequency: str) -> datetime: + """Calculate next run time based on frequency""" + now = datetime.utcnow() + + if frequency == 'daily': + # Next run at midnight + next_day = now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1) + return next_day + elif frequency == 'hourly': + # Next run at the top of the next hour + next_hour = now.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1) + return next_hour + elif 'minutes' in frequency or 'minute' in frequency: + try: + minutes = int(frequency.split()[0]) + return now + timedelta(minutes=minutes) + except (ValueError, IndexError): + return now + timedelta(minutes=30) # Default to 30 minutes + elif 'hours' in frequency or 'hour' in frequency: + try: + hours = int(frequency.split()[0]) + return now + timedelta(hours=hours) + except (ValueError, IndexError): + return now + timedelta(hours=1) # Default to 1 hour + elif frequency == 'weekly': + return now + timedelta(days=7) + else: + return now + timedelta(days=1) # Default to daily + + def _get_job_function(self, name: str) -> Optional[Callable]: + """Map job name to function""" + from services.horoscope_service import horoscope_service + from services.llm_service import llm_service + from services.wordpress_service import wordpress_service + + # Map of job names to functions + job_map = { + "scrape_daily_horoscopes": self._scrape_daily_horoscopes, + "consolidate_horoscopes": self._consolidate_horoscopes, + "export_to_wordpress": self._export_to_wordpress, + } + + return job_map.get(name) + + def _scrape_daily_horoscopes(self): + """Job function to scrape daily horoscopes""" + from services.horoscope_service import horoscope_service + logger.info("Running daily horoscope scraping job") + + # Get today's date + today = datetime.today().strftime('%Y-%m-%d') + + # Run the scraping operation + results = horoscope_service.scrape_all_horoscopes(date_str=today) + + logger.info(f"Daily horoscope scraping completed: {len(results)} horoscopes scraped") + return results + + def _consolidate_horoscopes(self): + """Job function to consolidate horoscopes using LLM""" + from services.llm_service import llm_service + from models import Horoscope, ConsolidatedHoroscope + import json + + logger.info("Running horoscope consolidation job") + + today = datetime.today().date() + + try: + with app.app_context(): + # Get all zodiac signs + signs = ["aries", "taurus", "gemini", "cancer", + "leo", "virgo", "libra", "scorpio", + "sagittarius", "capricorn", "aquarius", "pisces"] + + for sign in signs: + # Find unconsolidated horoscopes for today and this sign + horoscopes = Horoscope.query.filter_by( + sign=sign, + date=today + ).all() + + if not horoscopes: + logger.warning(f"No horoscopes found for {sign} on {today}") + continue + + # Check if already consolidated + existing = ConsolidatedHoroscope.query.filter_by( + sign=sign, + date=today + ).first() + + if existing: + logger.info(f"Horoscopes for {sign} on {today} already consolidated") + continue + + # Convert to format needed by LLM service + horoscope_data = [h.to_dict() for h in horoscopes] + + # Consolidate data using LLM + consolidated = llm_service.consolidate_horoscopes(horoscope_data) + + if not consolidated or "error" in consolidated: + logger.error(f"Error consolidating horoscopes for {sign}: {consolidated.get('error', 'Unknown error')}") + continue + + # Create new consolidated horoscope + sources = [h.source for h in horoscopes] + new_consolidated = ConsolidatedHoroscope() + new_consolidated.sign = sign + new_consolidated.date = today + new_consolidated.consolidated_prediction = consolidated.get("consolidated_prediction", "") + new_consolidated.sources = json.dumps(sources) + + db.session.add(new_consolidated) + db.session.commit() + + logger.info(f"Consolidated horoscope created for {sign} on {today}") + + logger.info("Horoscope consolidation job completed") + return True + except Exception as e: + logger.error(f"Error in consolidation job: {str(e)}") + return False + + def _export_to_wordpress(self): + """Job function to export horoscopes to WordPress""" + from services.wordpress_service import wordpress_service + from models import ConsolidatedHoroscope, WordPressExport + + logger.info("Running WordPress export job") + + try: + with app.app_context(): + # Find consolidated horoscopes that haven't been exported + consolidated_horoscopes = db.session.query(ConsolidatedHoroscope)\ + .outerjoin(WordPressExport, ConsolidatedHoroscope.id == WordPressExport.horoscope_id)\ + .filter(WordPressExport.id == None)\ + .all() + + if not consolidated_horoscopes: + logger.info("No new horoscopes to export to WordPress") + return True + + logger.info(f"Found {len(consolidated_horoscopes)} horoscopes to export") + + for horoscope in consolidated_horoscopes: + # Export to WordPress + result = wordpress_service.publish_horoscope(horoscope) + + if not result or "error" in result: + logger.error(f"Error exporting horoscope {horoscope.id} to WordPress: {result.get('error', 'Unknown error')}") + + # Create failed export record + export = WordPressExport() + export.horoscope_id = horoscope.id + export.status = "failed" + db.session.add(export) + db.session.commit() + continue + + # Create successful export record + export = WordPressExport() + export.horoscope_id = horoscope.id + export.wordpress_post_id = result.get("post_id") + export.wordpress_url = result.get("url") + export.status = "published" + db.session.add(export) + db.session.commit() + + logger.info(f"Exported horoscope {horoscope.id} to WordPress as post {result.get('post_id')}") + + logger.info("WordPress export job completed") + return True + except Exception as e: + logger.error(f"Error in WordPress export job: {str(e)}") + return False + + +# Create a singleton instance +scheduler_service = SchedulerService() \ No newline at end of file diff --git a/services/scraper_service.py b/services/scraper_service.py new file mode 100644 index 0000000000000000000000000000000000000000..2d5da6314dda35e324b42f1fbafd878152fb7fb9 --- /dev/null +++ b/services/scraper_service.py @@ -0,0 +1,159 @@ +import logging +import time +from typing import Dict, Any, List, Optional, Tuple, Type +from concurrent.futures import ThreadPoolExecutor, as_completed +from urllib.parse import urlparse + +from scrapers.base_scraper import BaseScraper +from scrapers.news_scraper import NewsScraper +from scrapers.blog_scraper import BlogScraper +from utils.rate_limiter import RateLimiter + +logger = logging.getLogger(__name__) + +class ScraperService: + """Service to manage scraping operations""" + + def __init__(self, max_workers: int = 5, timeout: int = 30): + """ + Initialize scraper service + + Args: + max_workers: Maximum number of concurrent scrapers + timeout: Timeout for each scraping operation in seconds + """ + self.max_workers = max_workers + self.timeout = timeout + self.rate_limiters = {} # Domain-specific rate limiters + + # Register available scrapers + self.scrapers = { + "news": NewsScraper(timeout), + "blog": BlogScraper(timeout), + } + + def get_scraper_for_url(self, url: str) -> Tuple[BaseScraper, str]: + """ + Determine the appropriate scraper to use for a URL + + Args: + url: URL to scrape + + Returns: + Tuple of (scraper instance, scraper type) + """ + # Simple logic to determine scraper type based on URL patterns + # This could be enhanced with more sophisticated detection + domain = urlparse(url).netloc.lower() + + # News site patterns + news_patterns = ["news", "cnn", "bbc", "reuters", "nytimes", "washingtonpost", + "guardian", "aljazeera", "foxnews", "nbcnews", "abc"] + + # Blog patterns + blog_patterns = ["blog", "medium", "wordpress", "blogspot", "tumblr", + "substack", "ghost", "hashnode"] + + # Check domain against patterns + for pattern in news_patterns: + if pattern in domain: + return self.scrapers["news"], "news" + + for pattern in blog_patterns: + if pattern in domain: + return self.scrapers["blog"], "blog" + + # Default to news scraper + return self.scrapers["news"], "news" + + def _get_rate_limiter(self, domain: str) -> RateLimiter: + """Get or create a rate limiter for a specific domain""" + if domain not in self.rate_limiters: + # Default: 5 requests per minute for each domain + self.rate_limiters[domain] = RateLimiter(window_size=60, max_requests=5) + return self.rate_limiters[domain] + + def scrape_url(self, url: str, scraper_type: Optional[str] = None) -> Dict[str, Any]: + """ + Scrape a single URL + + Args: + url: URL to scrape + scraper_type: Optional type of scraper to use + + Returns: + Dictionary with scraped data + """ + try: + # Parse domain for rate limiting + domain = urlparse(url).netloc + rate_limiter = self._get_rate_limiter(domain) + + # Check if we can proceed with the request + if not rate_limiter.can_proceed(): + wait_time = rate_limiter.get_wait_time() + logger.warning(f"Rate limit reached for {domain}. Waiting {wait_time:.2f} seconds") + time.sleep(wait_time) + + # Select appropriate scraper + if scraper_type and scraper_type in self.scrapers: + scraper = self.scrapers[scraper_type] + selected_type = scraper_type + else: + scraper, selected_type = self.get_scraper_for_url(url) + + logger.info(f"Scraping {url} with {selected_type} scraper") + + # Perform scraping and record the request + result = scraper.scrape(url) + rate_limiter.record_request() + + # Add metadata about scraping + result["scraper_type"] = selected_type + result["scraped_at"] = time.time() + + return result + + except Exception as e: + logger.error(f"Error scraping URL {url}: {str(e)}") + return { + "success": False, + "url": url, + "error": str(e), + "scraped_at": time.time() + } + + def scrape_multiple_urls(self, urls: List[str]) -> List[Dict[str, Any]]: + """ + Scrape multiple URLs in parallel + + Args: + urls: List of URLs to scrape + + Returns: + List of dictionaries with scraped data + """ + results = [] + + # Use ThreadPoolExecutor for concurrent scraping + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + # Submit scraping tasks + future_to_url = {executor.submit(self.scrape_url, url): url for url in urls} + + # Collect results as they complete + for future in as_completed(future_to_url): + url = future_to_url[future] + try: + result = future.result() + results.append(result) + logger.info(f"Completed scraping: {url}") + except Exception as e: + logger.error(f"Exception scraping {url}: {str(e)}") + results.append({ + "success": False, + "url": url, + "error": str(e), + "scraped_at": time.time() + }) + + return results diff --git a/services/wordpress_service.py b/services/wordpress_service.py new file mode 100644 index 0000000000000000000000000000000000000000..b74351351113ffaf858a8449156048e640e430f4 --- /dev/null +++ b/services/wordpress_service.py @@ -0,0 +1,241 @@ +import logging +import os +import json +import requests +from typing import Dict, Any, Optional +from datetime import datetime + +from models import ConsolidatedHoroscope + +logger = logging.getLogger(__name__) + +class WordPressService: + """Service for interacting with WordPress REST API""" + + def __init__(self): + """Initialize WordPress service""" + # WordPress API settings + self.api_url = os.environ.get("WORDPRESS_API_URL", "") + self.username = os.environ.get("WORDPRESS_USERNAME", "") + self.password = os.environ.get("WORDPRESS_PASSWORD", "") + + self.auth = None + if self.username and self.password: + self.auth = (self.username, self.password) + + # Check if WordPress integration is properly configured + self.is_configured = bool(self.api_url and self.auth) + if not self.is_configured: + logger.warning("WordPress service not properly configured. Missing API URL, username, or password.") + + def test_connection(self) -> Dict[str, Any]: + """Test the WordPress API connection""" + if not self.is_configured: + return {"success": False, "error": "WordPress API not configured"} + + try: + response = requests.get(f"{self.api_url}/wp/v2", auth=self.auth) + if response.status_code == 200: + return {"success": True, "message": "WordPress connection successful"} + else: + return { + "success": False, + "error": f"WordPress connection failed: {response.status_code}", + "details": response.text + } + except Exception as e: + logger.error(f"Error testing WordPress connection: {str(e)}") + return {"success": False, "error": str(e)} + + def publish_horoscope(self, horoscope: ConsolidatedHoroscope) -> Dict[str, Any]: + """ + Publish a consolidated horoscope to WordPress + + Args: + horoscope: ConsolidatedHoroscope object to publish + + Returns: + Dictionary with status and post details + """ + if not self.is_configured: + return {"success": False, "error": "WordPress API not configured"} + + try: + # Format the horoscope for WordPress + post_data = self._format_horoscope_for_wordpress(horoscope) + + # Create the post via WordPress API + response = requests.post( + f"{self.api_url}/wp/v2/posts", + auth=self.auth, + json=post_data + ) + + if response.status_code in (200, 201): + post_data = response.json() + return { + "success": True, + "post_id": post_data.get("id"), + "url": post_data.get("link"), + "status": post_data.get("status") + } + else: + logger.error(f"WordPress API error: {response.status_code}, {response.text}") + return { + "success": False, + "error": f"WordPress API error: {response.status_code}", + "details": response.text + } + except Exception as e: + logger.error(f"Error publishing horoscope to WordPress: {str(e)}") + return {"success": False, "error": str(e)} + + def _format_horoscope_for_wordpress(self, horoscope: ConsolidatedHoroscope) -> Dict[str, Any]: + """Format horoscope data for WordPress post""" + # Capitalize sign + sign = horoscope.sign.capitalize() + + # Format date + formatted_date = horoscope.date.strftime("%B %d, %Y") + + # Construct title + title = f"{sign} Horoscope for {formatted_date}" + + # Construct content + content = f""" +

{sign} Daily Horoscope - {formatted_date}

+ +
+ {horoscope.consolidated_prediction} +
+ + + """ + + # Extract sources + try: + sources = json.loads(horoscope.sources) + if sources: + sources_text = "\n

Sources: " + ", ".join(sources) + "

" + content += sources_text + except Exception: + pass + + # Construct post data + post_data = { + "title": title, + "content": content, + "status": "publish", # Could be: publish, draft, pending, future + "excerpt": horoscope.consolidated_prediction[:150] + "...", + "categories": [self._get_horoscope_category_id()], + "tags": [self._get_tag_id(sign.lower())] + } + + # Schedule post if needed + # if schedule_time: + # post_data["status"] = "future" + # post_data["date"] = schedule_time.isoformat() + + return post_data + + def _get_horoscope_category_id(self) -> int: + """ + Get or create the horoscope category ID + + Returns: + Category ID + """ + # Default to a common category ID if we can't find/create the specific one + default_category_id = 1 # Usually "Uncategorized" + + if not self.is_configured: + return default_category_id + + try: + # Check if Horoscopes category exists + response = requests.get( + f"{self.api_url}/wp/v2/categories", + auth=self.auth, + params={"search": "Horoscopes"} + ) + + if response.status_code == 200: + categories = response.json() + if categories: + # Return the first matching category + return categories[0].get("id", default_category_id) + + # Create the category if it doesn't exist + create_response = requests.post( + f"{self.api_url}/wp/v2/categories", + auth=self.auth, + json={ + "name": "Horoscopes", + "slug": "horoscopes", + "description": "Daily horoscopes for all zodiac signs" + } + ) + + if create_response.status_code in (200, 201): + category_data = create_response.json() + return category_data.get("id", default_category_id) + + return default_category_id + + except Exception as e: + logger.error(f"Error getting horoscope category ID: {str(e)}") + return default_category_id + + def _get_tag_id(self, tag_name: str) -> int: + """ + Get or create a tag ID + + Args: + tag_name: Name of the tag + + Returns: + Tag ID + """ + # Default to 0 which will be ignored by WordPress + default_tag_id = 0 + + if not self.is_configured: + return default_tag_id + + try: + # Check if tag exists + response = requests.get( + f"{self.api_url}/wp/v2/tags", + auth=self.auth, + params={"search": tag_name} + ) + + if response.status_code == 200: + tags = response.json() + if tags: + # Return the first matching tag + return tags[0].get("id", default_tag_id) + + # Create the tag if it doesn't exist + create_response = requests.post( + f"{self.api_url}/wp/v2/tags", + auth=self.auth, + json={ + "name": tag_name, + "slug": tag_name.lower().replace(" ", "-") + } + ) + + if create_response.status_code in (200, 201): + tag_data = create_response.json() + return tag_data.get("id", default_tag_id) + + return default_tag_id + + except Exception as e: + logger.error(f"Error getting tag ID: {str(e)}") + return default_tag_id + + +# Create a singleton instance +wordpress_service = WordPressService() \ No newline at end of file diff --git a/simple_app.py b/simple_app.py new file mode 100644 index 0000000000000000000000000000000000000000..7229affea35a30d7a6e699121d03b7fc1e98a61b --- /dev/null +++ b/simple_app.py @@ -0,0 +1,227 @@ +import os +import logging +from flask import Flask, render_template, request, jsonify +from flask_cors import CORS +import json +from datetime import datetime, date +from scrapers.horoscope_scraper import HoroscopeScraper +from scrapers.astrology_com_scraper import AstrologyComScraper +from scrapers.horoscope_com_scraper import HoroscopeComScraper + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Create Flask app +app = Flask(__name__) +app.secret_key = os.environ.get("SESSION_SECRET", "dev_secret_key") + +# Enable CORS +CORS(app) + +# Initialize scrapers +scrapers = { + "astrology.com": AstrologyComScraper(), + "horoscope.com": HoroscopeComScraper(), +} + +@app.route('/') +def index(): + """Main page""" + return render_template('simple_index.html') + +@app.route('/api/health') +def health_check(): + """Health check endpoint""" + return jsonify({ + "status": "ok", + "message": "Horoscope API is running", + "scrapers": list(scrapers.keys()) + }) + +@app.route('/api/horoscope/scrape', methods=['POST']) +def scrape_horoscope(): + """Scrape horoscope for a specific sign""" + data = request.get_json() + if not data: + return jsonify({"error": "Missing request data"}), 400 + + sign = data.get('sign', '').lower() + source = data.get('source', 'astrology.com') + + # Validate inputs + if not sign: + return jsonify({"error": "Missing 'sign' parameter"}), 400 + + if sign not in scrapers["astrology.com"].ZODIAC_SIGNS: + return jsonify({"error": f"Invalid zodiac sign: {sign}"}), 400 + + if source not in scrapers: + return jsonify({"error": f"Unknown source: {source}"}), 400 + + try: + # Scrape horoscope + scraper = scrapers[source] + result = scraper.scrape_sign(scraper.base_url, sign) + + if result.get('success', False): + return jsonify({ + "success": True, + "sign": sign, + "source": source, + "prediction": result.get('prediction', ''), + "date": result.get('date', date.today().isoformat()), + "scraped_at": datetime.now().isoformat() + }) + else: + return jsonify({ + "success": False, + "error": result.get('error', 'Unknown error occurred') + }), 500 + + except Exception as e: + logger.error(f"Error scraping horoscope: {str(e)}") + return jsonify({ + "success": False, + "error": f"Scraping failed: {str(e)}" + }), 500 + +@app.route('/api/horoscope/scrape-all', methods=['POST']) +def scrape_all_horoscopes(): + """Scrape horoscopes for all signs from all sources""" + data = request.get_json() or {} + + results = {} + + # Get all zodiac signs + signs = scrapers["astrology.com"].ZODIAC_SIGNS + + for sign in signs: + results[sign] = {} + + for source_name, scraper in scrapers.items(): + try: + result = scraper.scrape_sign(scraper.base_url, sign) + + if result.get('success', False): + results[sign][source_name] = { + "success": True, + "prediction": result.get('prediction', ''), + "date": result.get('date', date.today().isoformat()), + "scraped_at": datetime.now().isoformat() + } + else: + results[sign][source_name] = { + "success": False, + "error": result.get('error', 'Unknown error') + } + + except Exception as e: + logger.error(f"Error scraping {sign} from {source_name}: {str(e)}") + results[sign][source_name] = { + "success": False, + "error": str(e) + } + + return jsonify({ + "success": True, + "results": results, + "scraped_at": datetime.now().isoformat() + }) + +@app.route('/api/horoscope/consolidate', methods=['POST']) +def consolidate_horoscope(): + """Consolidate horoscope predictions using OpenAI""" + data = request.get_json() + if not data: + return jsonify({"error": "Missing request data"}), 400 + + predictions = data.get('predictions', []) + sign = data.get('sign', '').lower() + + if not predictions: + return jsonify({"error": "No predictions provided"}), 400 + + if not sign: + return jsonify({"error": "Missing 'sign' parameter"}), 400 + + # Check if OpenAI API key is available + openai_api_key = os.environ.get("OPENAI_API_KEY") + if not openai_api_key: + return jsonify({ + "error": "OpenAI API key not configured. Please provide OPENAI_API_KEY." + }), 500 + + try: + from openai import OpenAI + + client = OpenAI(api_key=openai_api_key) + + # Create prompt for consolidation + sources_text = "" + for i, pred in enumerate(predictions, 1): + source = pred.get("source", f"Source {i}") + prediction = pred.get("prediction", "No prediction available") + sources_text += f"SOURCE {i} ({source}):\n{prediction}\n\n" + + prompt = f""" + Please analyze and consolidate these daily horoscope predictions for {sign.upper()}. + + {sources_text} + + Create a single, coherent daily horoscope prediction that synthesizes the information from all sources. + Focus on the common themes and advice while maintaining the mystical and guiding tone typical of horoscopes. + The response should be 2-3 paragraphs long and should NOT mention the sources or that it's a consolidation. + + Respond with JSON in this format: + {{ + "consolidated_prediction": "The consolidated horoscope text..." + }} + """ + + # Call OpenAI API + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are an expert astrologer specializing in synthesizing horoscope predictions."}, + {"role": "user", "content": prompt} + ], + response_format={"type": "json_object"}, + temperature=0.7 + ) + + # Parse the response + content = response.choices[0].message.content + if content: + result = json.loads(content) + else: + result = {"consolidated_prediction": "Unable to generate consolidated prediction."} + + return jsonify({ + "success": True, + "sign": sign, + "consolidated_prediction": result.get("consolidated_prediction", ""), + "sources_count": len(predictions), + "consolidated_at": datetime.now().isoformat() + }) + + except ImportError: + return jsonify({ + "error": "OpenAI library not available. Please install: pip install openai" + }), 500 + except Exception as e: + logger.error(f"Error consolidating horoscopes: {str(e)}") + return jsonify({ + "error": f"Consolidation failed: {str(e)}" + }), 500 + +@app.route('/api/signs') +def get_zodiac_signs(): + """Get list of zodiac signs""" + return jsonify({ + "signs": scrapers["astrology.com"].ZODIAC_SIGNS + }) + +if __name__ == "__main__": + logger.info("Starting simple horoscope API server") + app.run(host="0.0.0.0", port=5000, debug=True) \ No newline at end of file diff --git a/simple_inference.py b/simple_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..f8c0b80f7790efbb001ea1ed33321893893570c3 --- /dev/null +++ b/simple_inference.py @@ -0,0 +1,193 @@ +""" +Simple Inference Script for TinyLlama +This script demonstrates how to use a fine-tuned TinyLlama model for text generation +without requiring all the training dependencies. +""" + +import os +import argparse +import json +import time + +def parse_args(): + parser = argparse.ArgumentParser(description="Run inference with a TinyLlama model") + parser.add_argument( + "--model_path", + type=str, + default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + help="Path to the model directory or HuggingFace model name" + ) + parser.add_argument( + "--prompt", + type=str, + default=None, + help="Text prompt for generation" + ) + parser.add_argument( + "--prompt_file", + type=str, + default=None, + help="File containing multiple prompts (one per line)" + ) + parser.add_argument( + "--max_new_tokens", + type=int, + default=256, + help="Maximum number of tokens to generate" + ) + parser.add_argument( + "--temperature", + type=float, + default=0.7, + help="Sampling temperature" + ) + parser.add_argument( + "--output_file", + type=str, + default="generated_outputs.json", + help="File to save generated outputs" + ) + parser.add_argument( + "--interactive", + action="store_true", + help="Run in interactive mode" + ) + return parser.parse_args() + +def format_prompt_for_chat(prompt): + """Format a prompt for chat completion""" + return f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" + +def main(): + args = parse_args() + + try: + # Import libraries here to make the error messages clearer + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer + except ImportError: + print("Error: Required libraries not installed.") + print("Please install them with: pip install torch transformers") + return + + print(f"Loading model from {args.model_path}...") + + # Load model and tokenizer + try: + model = AutoModelForCausalLM.from_pretrained( + args.model_path, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + low_cpu_mem_usage=True + ) + tokenizer = AutoTokenizer.from_pretrained(args.model_path) + + # Move model to GPU if available + device = "cuda" if torch.cuda.is_available() else "cpu" + model = model.to(device) + model.eval() + + print(f"Model loaded successfully on {device}") + except Exception as e: + print(f"Error loading model: {e}") + return + + if args.interactive: + print("\n=== Interactive Mode ===") + print("Type 'exit' or 'quit' to end the session") + print("Type your prompts and press Enter.\n") + + while True: + user_input = input("\nYou: ") + if user_input.lower() in ["exit", "quit"]: + break + + # Format prompt for chat + formatted_prompt = format_prompt_for_chat(user_input) + + # Tokenize input + inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device) + + # Generate response + start_time = time.time() + with torch.no_grad(): + outputs = model.generate( + inputs.input_ids, + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + do_sample=True, + pad_token_id=tokenizer.eos_token_id + ) + + # Decode response + full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract just the assistant's response + try: + # This handles the case where the model properly follows the formatting + assistant_response = full_response.split("<|im_start|>assistant\n")[1].split("<|im_end|>")[0] + except: + # Fallback for when the model doesn't follow formatting perfectly + assistant_response = full_response.replace(user_input, "").strip() + + gen_time = time.time() - start_time + tokens_per_second = len(outputs[0]) / gen_time + + print(f"\nAssistant: {assistant_response}") + print(f"\n[Generated {len(outputs[0])} tokens in {gen_time:.2f}s - {tokens_per_second:.2f} tokens/s]") + + else: + # Get prompts + prompts = [] + if args.prompt: + prompts.append(args.prompt) + elif args.prompt_file: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompts = [line.strip() for line in f if line.strip()] + else: + print("Error: Either --prompt or --prompt_file must be provided") + return + + results = [] + print(f"Processing {len(prompts)} prompts...") + + for i, prompt in enumerate(prompts): + print(f"Processing prompt {i+1}/{len(prompts)}") + + # Format prompt for chat + formatted_prompt = format_prompt_for_chat(prompt) + + # Tokenize input + inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device) + + # Generate response + with torch.no_grad(): + outputs = model.generate( + inputs.input_ids, + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + do_sample=True, + pad_token_id=tokenizer.eos_token_id + ) + + # Decode response + full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract just the assistant's response + try: + assistant_response = full_response.split("<|im_start|>assistant\n")[1].split("<|im_end|>")[0] + except: + assistant_response = full_response.replace(prompt, "").strip() + + results.append({ + "prompt": prompt, + "response": assistant_response + }) + + # Save results + with open(args.output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + print(f"Generated {len(results)} responses and saved to {args.output_file}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/static/.DS_Store b/static/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..bf57a98fe8184729bfd4d1852677488bffb3e3cd Binary files /dev/null and b/static/.DS_Store differ diff --git a/static/css/custom.css b/static/css/custom.css new file mode 100644 index 0000000000000000000000000000000000000000..34bc1bb7fa2792f05671b86c7ed427b14602193d --- /dev/null +++ b/static/css/custom.css @@ -0,0 +1,80 @@ +/* Custom styles for Web Scraper API */ + +/* Code blocks formatting */ +pre { + background-color: #1e1e1e; + color: #d4d4d4; + border-radius: 5px; + padding: 15px; + overflow-x: auto; +} + +/* Result containers */ +.result-pre { + max-height: 500px; + overflow-y: auto; +} + +/* Sticky footer */ +html { + position: relative; + min-height: 100%; +} + +body { + margin-bottom: 60px; /* Margin bottom by footer height */ +} + +.footer { + position: absolute; + bottom: 0; + width: 100%; + height: 60px; /* Set the fixed height of the footer here */ + line-height: 60px; /* Vertically center the text there */ +} + +/* Custom accordion styling */ +.accordion-button { + background-color: #2c2c2c !important; +} + +.accordion-button:not(.collapsed) { + background-color: #3c3c3c !important; + color: white; +} + +.accordion-button:focus { + box-shadow: none; +} + +/* Custom spinner colors */ +.spinner-border { + color: #17a2b8 !important; +} + +/* Custom button styling */ +.btn-primary { + background-color: #17a2b8; + border-color: #17a2b8; +} + +.btn-primary:hover { + background-color: #138496; + border-color: #117a8b; +} + +/* Icons in buttons and headers */ +[data-feather] { + vertical-align: text-bottom; + margin-right: 0.25rem; +} + +/* Custom card styling */ +.card { + border-color: #444; +} + +.card-header { + background-color: #2c2c2c; + border-bottom-color: #444; +} diff --git a/static/js/main.js b/static/js/main.js new file mode 100644 index 0000000000000000000000000000000000000000..17202e6178dc5389466654e0c09196aa86dc0c94 --- /dev/null +++ b/static/js/main.js @@ -0,0 +1,119 @@ +// Main JavaScript for Web Scraper API UI + +document.addEventListener('DOMContentLoaded', function() { + // Get form and result elements + const testForm = document.getElementById('test-form'); + const urlsTextarea = document.getElementById('urls'); + const spinner = document.getElementById('spinner'); + const resultSection = document.getElementById('result-section'); + const consolidatedResult = document.getElementById('consolidated-result'); + const rawResult = document.getElementById('raw-result'); + + // Only set up event listener if the form exists (we're on the home page) + if (testForm) { + testForm.addEventListener('submit', async function(e) { + e.preventDefault(); + + // Get URLs from textarea (one per line) + const urlsText = urlsTextarea.value.trim(); + if (!urlsText) { + showError('Please enter at least one URL'); + return; + } + + // Split by newline and filter out empty lines + const urls = urlsText.split('\n') + .map(url => url.trim()) + .filter(url => url.length > 0); + + if (urls.length === 0) { + showError('Please enter at least one valid URL'); + return; + } + + if (urls.length > 10) { + showError('Maximum 10 URLs allowed per request'); + return; + } + + // Show spinner, hide results + spinner.classList.remove('d-none'); + resultSection.classList.add('d-none'); + + try { + // Make API request + const response = await fetch('/api/scrape-and-consolidate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ urls }) + }); + + // Parse response + const data = await response.json(); + + // Update results + if (response.ok) { + consolidatedResult.textContent = JSON.stringify(data.consolidated_data, null, 2); + rawResult.textContent = JSON.stringify(data.scraped_results, null, 2); + resultSection.classList.remove('d-none'); + } else { + showError(`Error: ${data.error || 'Unknown error occurred'}`); + } + } catch (error) { + showError(`Error: ${error.message}`); + } finally { + // Hide spinner + spinner.classList.add('d-none'); + } + }); + } + + // Initialize Feather icons + if (typeof feather !== 'undefined') { + feather.replace(); + } +}); + +/** + * Show error message to user + * @param {string} message - Error message to display + */ +function showError(message) { + // Check if alert container exists, create if not + let alertContainer = document.getElementById('alert-container'); + + if (!alertContainer) { + alertContainer = document.createElement('div'); + alertContainer.id = 'alert-container'; + alertContainer.className = 'mt-3'; + + // Insert after the form + const testForm = document.getElementById('test-form'); + if (testForm) { + testForm.after(alertContainer); + } + } + + // Create alert + const alert = document.createElement('div'); + alert.className = 'alert alert-danger alert-dismissible fade show'; + alert.role = 'alert'; + + // Create alert content + alert.innerHTML = ` + ${message} + + `; + + // Add alert to container + alertContainer.innerHTML = ''; + alertContainer.appendChild(alert); + + // Auto-dismiss after 5 seconds + setTimeout(() => { + const bsAlert = new bootstrap.Alert(alert); + bsAlert.close(); + }, 5000); +} diff --git a/swift_setup.py b/swift_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..705b35e62446f482f7233ddbde6bee1d3ee39ee7 --- /dev/null +++ b/swift_setup.py @@ -0,0 +1,243 @@ +""" +Setup Script for SWIFT MT564 Documentation Assistant + +This script guides you through setting up the project, installing dependencies, +downloading the TinyLlama model, and uploading your fine-tuned model to Hugging Face. + +Usage: + python setup.py --mode [install|download|upload] --options [options] +""" + +import os +import sys +import argparse +import subprocess +import logging + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Setup SWIFT MT564 Documentation Assistant") + parser.add_argument( + "--mode", + type=str, + choices=["install", "download", "upload", "guide"], + default="guide", + help="Mode of operation: install dependencies, download model, upload model, or show guide" + ) + parser.add_argument( + "--model_name", + type=str, + default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + help="Model name on Hugging Face Hub (for download mode)" + ) + parser.add_argument( + "--output_dir", + type=str, + default="./data/models", + help="Directory to save the model (for download mode)" + ) + parser.add_argument( + "--model_dir", + type=str, + default="./mt564_tinyllama_model", + help="Directory containing fine-tuned model (for upload mode)" + ) + parser.add_argument( + "--repo_name", + type=str, + help="Repository name on Hugging Face Hub (for upload mode)" + ) + return parser.parse_args() + +def install_dependencies(): + """Install required Python packages""" + try: + logger.info("Installing dependencies...") + + if os.path.exists("dependencies.txt"): + subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "dependencies.txt"]) + logger.info("Dependencies installed successfully") + else: + logger.error("dependencies.txt file not found") + logger.info("You need to install the following packages:") + logger.info("- torch, transformers, datasets, accelerate") + logger.info("- huggingface_hub, requests, beautifulsoup4, trafilatura") + logger.info("- flask, PyPDF2, pdf2image, pytesseract") + logger.info("Run: pip install torch transformers datasets huggingface_hub accelerate flask") + + return True + except subprocess.CalledProcessError as e: + logger.error(f"Error installing dependencies: {e}") + return False + +def download_model(model_name, output_dir): + """Download TinyLlama model""" + try: + logger.info(f"Downloading model {model_name}...") + + if not os.path.exists("model/download_tinyllama.py"): + logger.error("download_tinyllama.py script not found") + return False + + subprocess.check_call([ + sys.executable, + "model/download_tinyllama.py", + "--model_name", model_name, + "--output_dir", output_dir + ]) + + logger.info(f"Model downloaded successfully to {output_dir}") + return True + except subprocess.CalledProcessError as e: + logger.error(f"Error downloading model: {e}") + return False + +def upload_model(model_dir, repo_name): + """Upload fine-tuned model to Hugging Face Hub""" + try: + if not repo_name: + logger.error("Repository name is required for upload mode") + return False + + if not os.path.exists("model/upload_to_huggingface.py"): + logger.error("upload_to_huggingface.py script not found") + return False + + # Check for authentication token + if "HUGGING_FACE_TOKEN" not in os.environ: + logger.error("HUGGING_FACE_TOKEN environment variable not set") + logger.info("You need to set your Hugging Face API token:") + logger.info("export HUGGING_FACE_TOKEN=your_token_here") + return False + + logger.info(f"Uploading model from {model_dir} to {repo_name}...") + + subprocess.check_call([ + sys.executable, + "model/upload_to_huggingface.py", + "--model_dir", model_dir, + "--repo_name", repo_name + ]) + + logger.info(f"Model uploaded successfully to {repo_name}") + return True + except subprocess.CalledProcessError as e: + logger.error(f"Error uploading model: {e}") + return False + +def show_guide(): + """Show guide for using the SWIFT MT564 Documentation Assistant""" + guide = """ +================================================= +SWIFT MT564 Documentation Assistant - Setup Guide +================================================= + +This project helps you build an AI assistant specialized in SWIFT MT564 messages. +Below is a step-by-step guide to setting up and using this project: + +1. INSTALLATION +--------------- +Install dependencies: + python setup.py --mode install + +2. DATA COLLECTION +----------------- +Run the ISO20022 scraper: + python scrapers/iso20022_scraper.py --output_dir ./data/raw + +Process the scraped data: + python scrapers/data_processor.py --input_dir ./data/raw --output_dir ./data/processed + +3. MODEL PREPARATION +------------------- +Download TinyLlama model: + python setup.py --mode download --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./data/models + +4. TRAINING +---------- +Train the model on MT564 data: + python train_mt564_model.py --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 --training_data ./data/processed/mt564_training_data.json --output_dir ./mt564_tinyllama_model + +5. DEPLOYMENT +------------ +Upload model to Hugging Face Hub: + # First set your Hugging Face token + export HUGGING_FACE_TOKEN=your_token_here + + # Then upload the model + python setup.py --mode upload --model_dir ./mt564_tinyllama_model --repo_name your-username/mt564-tinyllama + +6. USING THE MODEL ON HUGGING FACE +--------------------------------- +Create a Hugging Face Space: + - Go to huggingface.co + - Click "New Space" + - Choose a Gradio or Streamlit template + - Link to your uploaded model + - Use the following code in your app.py: + +```python +import gradio as gr +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Load your model from Hugging Face Hub +model_name = "your-username/mt564-tinyllama" +model = AutoModelForCausalLM.from_pretrained(model_name) +tokenizer = AutoTokenizer.from_pretrained(model_name) + +def answer_question(question): + # Format prompt for the chat model + prompt = f"<|im_start|>user\\n{question}<|im_end|>\\n<|im_start|>assistant\\n" + + # Generate response + inputs = tokenizer(prompt, return_tensors="pt") + outputs = model.generate(inputs.input_ids, max_new_tokens=256, temperature=0.7) + response = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract assistant's response + try: + assistant_response = response.split("<|im_start|>assistant\\n")[1].split("<|im_end|>")[0] + except: + assistant_response = response + + return assistant_response + +# Create Gradio interface +iface = gr.Interface( + fn=answer_question, + inputs=gr.Textbox(lines=3, placeholder="Ask a question about SWIFT MT564..."), + outputs=gr.Textbox(), + title="SWIFT MT564 Documentation Assistant", + description="Ask questions about SWIFT MT564 message formats and specifications" +) + +iface.launch() +``` + +For more details and advanced usage, refer to the README.md. +""" + print(guide) + return True + +def main(): + args = parse_args() + + if args.mode == "install": + success = install_dependencies() + elif args.mode == "download": + success = download_model(args.model_name, args.output_dir) + elif args.mode == "upload": + success = upload_model(args.model_dir, args.repo_name) + else: # guide + success = show_guide() + + if success: + logger.info(f"{args.mode.capitalize()} completed successfully") + else: + logger.error(f"{args.mode.capitalize()} failed") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/templates/docs.html b/templates/docs.html new file mode 100644 index 0000000000000000000000000000000000000000..62dd809ddc71f8610881cf7ed85998d0e78fbb57 --- /dev/null +++ b/templates/docs.html @@ -0,0 +1,343 @@ + + + + + + API Documentation - Cosmic Guide Horoscope API + + + + + + + + + + +
+
+ +
+
+

Horoscope API Documentation

+

Complete reference for the Cosmic Guide Horoscope API with LLM Integration

+
+ +
+

Overview

+

This API allows you to scrape horoscope data from multiple astrological websites, consolidate the information using LLM processing, and retrieve it in a structured format. It's designed to be easy to use while providing powerful data processing capabilities.

+

Base URL

+ http://localhost:5000 +
+ +
+

Authentication

+

Currently, no authentication is required to use the API. However, the API uses rate limiting to prevent abuse.

+
+ +
+

Rate Limiting

+

To prevent overwhelming target websites and the API itself, rate limiting is implemented:

+
    +
  • API-wide limit: 10 requests per minute
  • +
  • Per-domain limit: 5 requests per minute
  • +
+

When a rate limit is exceeded, the API will return a 429 status code with information about how long to wait before retrying.

+
+ +
+

Endpoints

+ +
+
+

Get Horoscope

+ GET /api/horoscope/get/{sign} +
+
+

Get horoscope for a specific zodiac sign.

+ +

Parameters

+
    +
  • sign (path parameter): Zodiac sign (e.g., aries, taurus, gemini, etc.)
  • +
  • date (query parameter, optional): Date in YYYY-MM-DD format. Defaults to today's date.
  • +
  • source (query parameter, optional): Specific source to get the horoscope from.
  • +
+ +

Response

+
{
+  "id": 1,
+  "sign": "aries",
+  "date": "2025-05-13",
+  "prediction": "Today is a good day for new beginnings...",
+  "source": "astrology.com",
+  "created_at": "2025-05-13T00:00:00.000Z"
+}
+
+
+ +
+
+

Get All Horoscopes

+ GET /api/horoscope/get-all +
+
+

Get horoscopes for all zodiac signs for a specific date.

+ +

Parameters

+
    +
  • date (query parameter, optional): Date in YYYY-MM-DD format. Defaults to today's date.
  • +
+ +

Response

+
{
+  "date": "2025-05-13",
+  "horoscopes": {
+    "aries": [
+      {
+        "id": 1,
+        "sign": "aries",
+        "date": "2025-05-13",
+        "prediction": "Today is a good day for new beginnings...",
+        "source": "astrology.com",
+        "created_at": "2025-05-13T00:00:00.000Z"
+      },
+      {
+        "id": 2,
+        "sign": "aries",
+        "date": "2025-05-13",
+        "prediction": "You'll find new opportunities today...",
+        "source": "horoscope.com",
+        "created_at": "2025-05-13T00:00:00.000Z"
+      }
+    ],
+    "taurus": [
+      // Taurus horoscopes...
+    ],
+    // Other signs...
+  }
+}
+
+
+ +
+
+

Scrape Horoscope

+ POST /api/horoscope/scrape +
+
+

Scrape horoscope for a specific sign from a source.

+ +

Request Body

+
{
+  "sign": "aries",
+  "source": "astrology.com",  // Optional
+  "date": "2025-05-13"  // Optional, defaults to today
+}
+ +

Response

+
{
+  "success": true,
+  "sign": "aries",
+  "scraped_date": "2025-05-13",
+  "prediction": "Today is a good day for new beginnings...",
+  "date": "2025-05-13",
+  "source": "astrology.com",
+  "source_name": "Astrology.com",
+  "type": "horoscope"
+}
+
+
+ +
+
+

Scrape All Horoscopes

+ POST /api/horoscope/scrape-all +
+
+

Scrape horoscopes for all signs from all sources.

+ +

Request Body

+
{
+  "date": "2025-05-13"  // Optional, defaults to today
+}
+ +

Response

+
{
+  "results": [
+    // Array of scraped horoscopes
+  ]
+}
+
+
+ +
+
+

Consolidate Horoscope

+ POST /api/horoscope/consolidate/{sign} +
+
+

Consolidate horoscopes for a specific sign using LLM.

+ +

Parameters

+
    +
  • sign (path parameter): Zodiac sign (e.g., aries, taurus, gemini, etc.)
  • +
+ +

Request Body

+
{
+  "date": "2025-05-13"  // Optional, defaults to today
+}
+ +

Response

+
{
+  "message": "Consolidated horoscope created for aries on 2025-05-13",
+  "horoscope": {
+    "id": 1,
+    "sign": "aries",
+    "date": "2025-05-13",
+    "consolidated_prediction": "The stars align perfectly for you today...",
+    "sources": "[\"astrology.com\", \"horoscope.com\"]",
+    "created_at": "2025-05-13T00:00:00.000Z"
+  }
+}
+
+
+ +
+
+

Scheduler API

+ Various endpoints +
+
+

Endpoints for managing scheduled jobs.

+ +

Get Scheduled Jobs

+ GET /api/horoscope/schedule +
{
+  "jobs": [
+    {
+      "id": 1,
+      "name": "scrape_daily_horoscopes",
+      "frequency": "daily",
+      "last_run": "2025-05-12T00:00:00.000Z",
+      "next_run": "2025-05-13T00:00:00.000Z",
+      "enabled": true,
+      "created_at": "2025-05-01T00:00:00.000Z",
+      "active": true
+    }
+  ]
+}
+ +

Add Scheduled Job

+ POST /api/horoscope/schedule +

Request Body:

+
{
+  "name": "scrape_daily_horoscopes",
+  "frequency": "daily"
+}
+ +

Remove Scheduled Job

+ DELETE /api/horoscope/schedule/{name} +
+
+
+ +
+

Examples

+ +

Python Example

+

+import requests
+import json
+
+# Get horoscope for Aries
+response = requests.get('http://localhost:5000/api/horoscope/get/aries')
+data = response.json()
+print(f"Aries horoscope: {data['prediction']}")
+
+# Scrape and consolidate horoscope
+response = requests.post(
+    'http://localhost:5000/api/horoscope/consolidate/taurus',
+    json={}  # Use default date (today)
+)
+consolidated = response.json()
+print(f"Consolidated Taurus horoscope: {consolidated['horoscope']['consolidated_prediction']}")
+                    
+ +

JavaScript Example

+

+// Get all horoscopes for today
+async function getAllHoroscopes() {
+  const response = await fetch('http://localhost:5000/api/horoscope/get-all');
+  const data = await response.json();
+  
+  // Display each sign's horoscope
+  for (const sign in data.horoscopes) {
+    const horoscopes = data.horoscopes[sign];
+    if (horoscopes.length > 0) {
+      console.log(`${sign.toUpperCase()}: ${horoscopes[0].prediction}`);
+    }
+  }
+}
+
+getAllHoroscopes();
+                    
+
+
+
+
+ +
+
+ Cosmic Guide - AI-Powered Horoscopes © 2025 +
+
+ + + + + + + diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..a7838a4a1a44b4daa606cfbc1917e887eeb956ca --- /dev/null +++ b/templates/index.html @@ -0,0 +1,335 @@ + + + + + + Daily Horoscopes - AI-Powered Astrology + + + + + + + + + + +
+
+
+

AI-Powered Daily Horoscopes

+

Discover what the stars have in store for you with our AI-enhanced horoscope readings, consolidated from multiple trusted astrological sources.

+ +
+
+ +
+
+
+

Daily Horoscopes

+
+
+
+
Aries
Mar 21 - Apr 19
+
+
+
+
Taurus
Apr 20 - May 20
+
+
+
+
Gemini
May 21 - Jun 20
+
+
+
+
Cancer
Jun 21 - Jul 22
+
+
+
+
Leo
Jul 23 - Aug 22
+
+
+
+
Virgo
Aug 23 - Sep 22
+
+
+
+
Libra
Sep 23 - Oct 22
+
+
+
+
Scorpio
Oct 23 - Nov 21
+
+
+
+
Sagittarius
Nov 22 - Dec 21
+
+
+
+
Capricorn
Dec 22 - Jan 19
+
+
+
+
Aquarius
Jan 20 - Feb 18
+
+
+
+
Pisces
Feb 19 - Mar 20
+
+
+
+
+
+
+

How It Works

+

Our AI-powered system scrapes multiple trusted astrological sources daily and uses advanced language models to consolidate the predictions.

+
    +
  • Scrapes multiple astrological websites
  • +
  • Uses AI to consolidate predictions
  • +
  • Updates daily for fresh insights
  • +
  • Exports to WordPress for easy publishing
  • +
  • Accessible via API for developers
  • +
+
+
+
+ +
+
+
+
+

Your Daily Horoscope

+
+
+
+

Select your zodiac sign above to view your horoscope

+
+
+
+
+
+

+

+
+
+
+ Loading... +
+
+
+
+
+
+
+
+ +
+
+
+
+

About Cosmic Guide

+
+
+

Cosmic Guide is an AI-powered horoscope consolidation service that brings together multiple astrological sources to provide you with the most comprehensive daily readings.

+

Our system uses advanced web scraping techniques to collect data from trusted astrological websites, and then employs state-of-the-art language models to analyze and consolidate the information into a coherent reading.

+

Whether you're a casual horoscope reader or a dedicated astrology enthusiast, Cosmic Guide offers insightful predictions that can help guide your day.

+

API Access

+

Developers can access our horoscope data through our API. Check out the API documentation for more details.

+
+
+
+
+
+ +
+
+ Cosmic Guide - AI-Powered Horoscopes © 2025 +
+
+ + + + + + + + diff --git a/templates/mt564.html b/templates/mt564.html new file mode 100644 index 0000000000000000000000000000000000000000..1e8031810e8f2fa6d10e141cdfb7670c92d1dd89 --- /dev/null +++ b/templates/mt564.html @@ -0,0 +1,328 @@ + + + + + + MT564 TinyLlama Training - SWIFT Assistant + + + + + + + + +
+
+
+

SWIFT MT564 TinyLlama Training

+

Train specialized AI models on SWIFT MT564 Corporate Action Notification documentation for financial messaging assistance.

+
+
+ +
+
+
+
+

Upload MT564 Documentation

+
+
+
+
+ + +
Upload SWIFT MT564 documentation files (PDF, JSON, or TXT)
+
+ +
+
+
+
+
+ +
+
+
+

Training Configuration

+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+
+
+
+
+ +
+
+
+
+

Training Status

+
+
+
+

No training in progress

+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+

Test Trained Model

+
+
+
+
+ + +
+ +
+
+
+
+
+
+ +
+
+
+
+

About MT564 Training

+
+
+

MT564 is a SWIFT message type used for Corporate Action Notifications in financial services.

+
    +
  • Purpose: Notifies account holders of corporate actions affecting their securities
  • +
  • Content: Details about dividends, stock splits, mergers, and other corporate events
  • +
  • Training: The TinyLlama model learns message structure, field definitions, and usage patterns
  • +
  • Use Cases: Message parsing, field validation, documentation assistance
  • +
+
+ Note: This is for documentation and training purposes only. Do not use for production financial messaging without proper validation. +
+
+
+
+
+
+ +
+
+ SWIFT MT564 Assistant © 2025 +
+
+ + + + + + \ No newline at end of file diff --git a/templates/simple_index.html b/templates/simple_index.html new file mode 100644 index 0000000000000000000000000000000000000000..5ac5f03401141f0e1cd8ce8c0b50c6dda99b9ce7 --- /dev/null +++ b/templates/simple_index.html @@ -0,0 +1,297 @@ + + + + + + AI-Powered Daily Horoscopes + + + + +
+
+
+

AI-Powered Daily Horoscopes

+

Get personalized horoscope predictions powered by AI consolidation from multiple trusted sources

+
+
+ +
+
+
+
+
Single Sign Horoscope
+
+
+
+
+ + +
+
+ + +
+ +
+
+ Loading... +
+ Fetching horoscope... +
+
+
+
+
+ +
+
+
+
All Signs with AI Consolidation
+
+
+

Scrape horoscopes for all zodiac signs from multiple sources and get AI-consolidated predictions.

+ +
+
+ Loading... +
+ Scraping all horoscopes... +
+
+
+
+
+ +
+
+
+
+
+ +
+
+
+
+
API Endpoints
+
+
+
Available Endpoints:
+
    +
  • GET /api/health - Health check
  • +
  • POST /api/horoscope/scrape - Scrape single horoscope
  • +
  • POST /api/horoscope/scrape-all - Scrape all horoscopes
  • +
  • POST /api/horoscope/consolidate - AI consolidate predictions
  • +
  • GET /api/signs - Get zodiac signs list
  • +
+
+
+
+
+
+ + + + \ No newline at end of file diff --git a/test_horoscope.py b/test_horoscope.py new file mode 100644 index 0000000000000000000000000000000000000000..07323c33ad82673a1bbb8f45c21a955e491d6803 --- /dev/null +++ b/test_horoscope.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +""" +Test script for horoscope system integration with astroastayogini.in +""" + +import requests +import json +from datetime import date +import sys + +def test_astrology_scraper(): + """Test direct scraping from astrology.com""" + print("Testing Astrology.com scraper...") + + try: + from scrapers.astrology_com_scraper import AstrologyComScraper + + scraper = AstrologyComScraper() + result = scraper.scrape_sign(scraper.base_url, "aries") + + if result.get('success'): + print(f"✅ Successfully scraped Aries horoscope from astrology.com") + print(f"Prediction: {result['prediction'][:100]}...") + return True + else: + print(f"❌ Failed to scrape: {result.get('error')}") + return False + + except Exception as e: + print(f"❌ Error testing astrology.com: {str(e)}") + return False + +def test_horoscope_scraper(): + """Test direct scraping from horoscope.com""" + print("\nTesting Horoscope.com scraper...") + + try: + from scrapers.horoscope_com_scraper import HoroscopeComScraper + + scraper = HoroscopeComScraper() + result = scraper.scrape_sign(scraper.base_url, "leo") + + if result.get('success'): + print(f"✅ Successfully scraped Leo horoscope from horoscope.com") + print(f"Prediction: {result['prediction'][:100]}...") + return True + else: + print(f"❌ Failed to scrape: {result.get('error')}") + return False + + except Exception as e: + print(f"❌ Error testing horoscope.com: {str(e)}") + return False + +def test_wordpress_integration(): + """Test WordPress integration readiness for astroastayogini.in""" + print("\nTesting WordPress integration readiness...") + + try: + from services.wordpress_service import WordPressService + + # Test with mock astroastayogini.in configuration + wp_service = WordPressService() + + if wp_service.is_configured: + print("✅ WordPress service is configured") + test_result = wp_service.test_connection() + if test_result.get('success'): + print("✅ WordPress connection test successful") + else: + print(f"⚠️ WordPress connection test failed: {test_result.get('error')}") + else: + print("⚠️ WordPress not configured yet (expected - needs astroastayogini.in credentials)") + + return True + + except Exception as e: + print(f"❌ Error testing WordPress integration: {str(e)}") + return False + +def test_openai_integration(): + """Test OpenAI integration for horoscope consolidation""" + print("\nTesting OpenAI integration...") + + import os + api_key = os.environ.get("OPENAI_API_KEY") + + if not api_key: + print("⚠️ OpenAI API key not configured (provide OPENAI_API_KEY for AI consolidation)") + return False + + try: + from openai import OpenAI + + client = OpenAI(api_key=api_key) + + # Test with sample horoscope data + test_predictions = [ + { + "source": "astrology.com", + "prediction": "Today brings positive energy for new beginnings." + }, + { + "source": "horoscope.com", + "prediction": "Financial opportunities may present themselves today." + } + ] + + # Create consolidation prompt + sources_text = "" + for i, pred in enumerate(test_predictions, 1): + sources_text += f"SOURCE {i} ({pred['source']}):\n{pred['prediction']}\n\n" + + prompt = f""" + Please analyze and consolidate these daily horoscope predictions for ARIES. + + {sources_text} + + Create a single, coherent daily horoscope prediction that synthesizes the information from all sources. + Focus on the common themes and advice while maintaining the mystical and guiding tone typical of horoscopes. + The response should be 2-3 paragraphs long and should NOT mention the sources or that it's a consolidation. + + Respond with JSON in this format: + {{ + "consolidated_prediction": "The consolidated horoscope text..." + }} + """ + + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are an expert astrologer specializing in synthesizing horoscope predictions."}, + {"role": "user", "content": prompt} + ], + response_format={"type": "json_object"}, + temperature=0.7 + ) + + content = response.choices[0].message.content + if content: + result = json.loads(content) + consolidated = result.get("consolidated_prediction", "") + if consolidated: + print("✅ OpenAI consolidation test successful") + print(f"Consolidated prediction: {consolidated[:100]}...") + return True + + print("❌ OpenAI consolidation test failed") + return False + + except Exception as e: + print(f"❌ Error testing OpenAI: {str(e)}") + return False + +def main(): + """Run all tests for astroastayogini.in integration""" + print("🌟 Testing Horoscope System for astroastayogini.in Integration") + print("=" * 60) + + tests = [ + test_astrology_scraper, + test_horoscope_scraper, + test_wordpress_integration, + test_openai_integration + ] + + results = [] + for test in tests: + try: + result = test() + results.append(result) + except Exception as e: + print(f"❌ Test failed with exception: {str(e)}") + results.append(False) + + print("\n" + "=" * 60) + print("INTEGRATION READINESS SUMMARY:") + print("=" * 60) + + if results[0] and results[1]: + print("✅ Horoscope Scraping: READY") + else: + print("❌ Horoscope Scraping: NEEDS ATTENTION") + + if results[2]: + print("✅ WordPress Integration: READY (needs astroastayogini.in credentials)") + else: + print("❌ WordPress Integration: NEEDS ATTENTION") + + if results[3]: + print("✅ AI Consolidation: READY") + else: + print("⚠️ AI Consolidation: NEEDS OPENAI_API_KEY") + + success_rate = sum(results) / len(results) + print(f"\nOverall Readiness: {success_rate*100:.0f}%") + + if success_rate >= 0.5: + print("\n🎉 System is ready for astroastayogini.in integration!") + print("\nNext steps:") + print("1. Provide WordPress credentials for astroastayogini.in") + print("2. Provide OpenAI API key for enhanced horoscopes") + print("3. Configure automated daily publishing") + else: + print("\n⚠️ System needs configuration before integration") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tinyllama_training_example.ipynb b/tinyllama_training_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..63a103bf2155c9c2f4c03ddf1794a68d5f9f0cf3 --- /dev/null +++ b/tinyllama_training_example.ipynb @@ -0,0 +1,378 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TinyLlama Fine-Tuning Example\n", + "\n", + "This notebook demonstrates how to fine-tune a TinyLlama model on a custom dataset. We'll go through the following steps:\n", + "\n", + "1. Setting up the environment\n", + "2. Loading the model and tokenizer\n", + "3. Preparing the dataset\n", + "4. Fine-tuning the model\n", + "5. Evaluating the results\n", + "6. Saving and using the fine-tuned model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setting up the environment\n", + "\n", + "First, let's install the necessary libraries if they're not already installed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Install required libraries\n", + "!pip install torch transformers datasets accelerate tqdm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import required libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "import os\n", + "import json\n", + "import torch\n", + "from transformers import (\n", + " AutoModelForCausalLM, \n", + " AutoTokenizer,\n", + " Trainer, \n", + " TrainingArguments,\n", + " DataCollatorForLanguageModeling\n", + ")\n", + "from datasets import Dataset\n", + "from tqdm.notebook import tqdm\n", + "\n", + "# Check if GPU is available\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "print(f\"Using device: {device}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Loading the model and tokenizer\n", + "\n", + "We'll use the TinyLlama-1.1B-Chat-v1.0 model from Hugging Face." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "model_name = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n", + "\n", + "# Load tokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + "# Ensure the tokenizer has a padding token\n", + "if tokenizer.pad_token is None:\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + "\n", + "# Load model with reduced precision to save memory\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n", + " low_cpu_mem_usage=True\n", + ")\n", + "model = model.to(device)\n", + "\n", + "print(f\"Model and tokenizer loaded: {model_name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Preparing the dataset\n", + "\n", + "Let's load our example training data and format it properly for fine-tuning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Load example data\n", + "with open('example_training_data.json', 'r', encoding='utf-8') as f:\n", + " data = json.load(f)\n", + "\n", + "# Format data for instruction fine-tuning\n", + "formatted_data = []\n", + "for item in data:\n", + " # Format as a chat-like conversation\n", + " formatted_text = f\"<|im_start|>user\\n{item['instruction']}<|im_end|>\\n<|im_start|>assistant\\n{item['response']}<|im_end|>\"\n", + " formatted_data.append({\"text\": formatted_text})\n", + "\n", + "# Create a Hugging Face dataset\n", + "dataset = Dataset.from_list(formatted_data)\n", + "print(f\"Dataset created with {len(dataset)} examples\")\n", + "\n", + "# Show an example\n", + "print(\"\\nExample entry:\")\n", + "print(dataset[0]['text'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Tokenize the dataset\n", + "def tokenize_function(examples):\n", + " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True, max_length=512)\n", + "\n", + "# Add labels for causal language modeling\n", + "def add_labels(examples):\n", + " examples[\"labels\"] = examples[\"input_ids\"].copy()\n", + " return examples\n", + "\n", + "# Process dataset\n", + "tokenized_dataset = dataset.map(tokenize_function, batched=True)\n", + "tokenized_dataset = tokenized_dataset.map(add_labels, batched=True)\n", + "tokenized_dataset = tokenized_dataset.remove_columns([\"text\"])\n", + "\n", + "# Split into training and evaluation sets\n", + "tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)\n", + "print(f\"Training examples: {len(tokenized_dataset['train'])}\")\n", + "print(f\"Evaluation examples: {len(tokenized_dataset['test'])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Fine-tuning the model\n", + "\n", + "Now we'll set up the training configuration and fine-tune the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Set up training arguments\n", + "output_dir = \"./fine_tuned_tinyllama\"\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=output_dir,\n", + " overwrite_output_dir=True,\n", + " num_train_epochs=3, # Adjust based on your dataset size\n", + " per_device_train_batch_size=2, # Adjust based on your GPU memory\n", + " per_device_eval_batch_size=2,\n", + " gradient_accumulation_steps=4, # Accumulate gradients to simulate larger batch size\n", + " learning_rate=2e-5,\n", + " weight_decay=0.01,\n", + " logging_dir=f\"{output_dir}/logs\",\n", + " logging_steps=10,\n", + " eval_steps=100,\n", + " save_steps=100,\n", + " save_total_limit=2, # Only keep the 2 best checkpoints\n", + " evaluation_strategy=\"steps\",\n", + " fp16=torch.cuda.is_available(), # Use mixed precision if GPU is available\n", + " warmup_steps=100,\n", + " report_to=\"none\", # Disable reporting to wandb, etc.\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Set up data collator\n", + "data_collator = DataCollatorForLanguageModeling(\n", + " tokenizer=tokenizer,\n", + " mlm=False # We're doing causal language modeling, not masked language modeling\n", + ")\n", + "\n", + "# Set up trainer\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_dataset[\"train\"],\n", + " eval_dataset=tokenized_dataset[\"test\"],\n", + " data_collator=data_collator,\n", + ")\n", + "\n", + "# Train the model\n", + "print(\"Starting fine-tuning...\")\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Evaluating the results\n", + "\n", + "Let's evaluate the fine-tuned model on some test prompts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Save the fine-tuned model\n", + "trainer.save_model(output_dir)\n", + "tokenizer.save_pretrained(output_dir)\n", + "print(f\"Model saved to {output_dir}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Test the model with a few prompts\n", + "test_prompts = [\n", + " \"Explain the concept of neural networks.\",\n", + " \"Write a short story about a robot that learns to feel emotions.\",\n", + " \"What are three sustainable energy sources and how do they work?\"\n", + "]\n", + "\n", + "# Format prompts for the chat model\n", + "formatted_prompts = [f\"<|im_start|>user\\n{prompt}<|im_end|>\\n<|im_start|>assistant\\n\" for prompt in test_prompts]\n", + "\n", + "# Generate responses\n", + "for i, prompt in enumerate(formatted_prompts):\n", + " print(f\"\\n\\nPrompt {i+1}: {test_prompts[i]}\")\n", + " print(\"\\nGenerating response...\")\n", + " \n", + " inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n", + " \n", + " with torch.no_grad():\n", + " outputs = model.generate(\n", + " inputs.input_ids,\n", + " max_new_tokens=256,\n", + " temperature=0.7,\n", + " do_sample=True,\n", + " pad_token_id=tokenizer.eos_token_id\n", + " )\n", + " \n", + " # Get only the newly generated text (not the prompt)\n", + " response_text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)\n", + " \n", + " print(f\"Response: {response_text}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Using the fine-tuned model\n", + "\n", + "Here's how you can load and use your fine-tuned model in the future." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "# Load the fine-tuned model and tokenizer\n", + "def load_fine_tuned_model(model_path):\n", + " tokenizer = AutoTokenizer.from_pretrained(model_path)\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " model_path,\n", + " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32\n", + " )\n", + " model = model.to(device)\n", + " return model, tokenizer\n", + "\n", + "# Example of loading the model (uncomment to run)\n", + "# fine_tuned_model, fine_tuned_tokenizer = load_fine_tuned_model(output_dir)\n", + "\n", + "# Function to generate a response\n", + "def generate_response(model, tokenizer, prompt, max_length=256, temperature=0.7):\n", + " # Format the prompt\n", + " formatted_prompt = f\"<|im_start|>user\\n{prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n", + " \n", + " # Tokenize\n", + " inputs = tokenizer(formatted_prompt, return_tensors=\"pt\").to(device)\n", + " \n", + " # Generate\n", + " with torch.no_grad():\n", + " outputs = model.generate(\n", + " inputs.input_ids,\n", + " max_new_tokens=max_length,\n", + " temperature=temperature,\n", + " do_sample=True,\n", + " pad_token_id=tokenizer.eos_token_id\n", + " )\n", + " \n", + " # Decode\n", + " full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " \n", + " # Extract assistant's response\n", + " try:\n", + " assistant_response = full_response.split(\"<|im_start|>assistant\\n\")[1].split(\"<|im_end|>\")[0]\n", + " except IndexError:\n", + " assistant_response = full_response.replace(prompt, \"\").strip()\n", + " \n", + " return assistant_response\n", + "\n", + "# Example usage (uncomment to run)\n", + "# response = generate_response(fine_tuned_model, fine_tuned_tokenizer, \"Explain quantum computing.\")\n", + "# print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "You've successfully fine-tuned a TinyLlama model on a custom dataset! You can now use this model for various applications:\n", + "\n", + "1. Integrate it into a chatbot or virtual assistant\n", + "2. Use it for content generation\n", + "3. Deploy it as part of a web application\n", + "4. Fine-tune it further on more specific data\n", + "\n", + "You can also experiment with different hyperparameters and training strategies to improve results." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/train_mt564_model.py b/train_mt564_model.py new file mode 100644 index 0000000000000000000000000000000000000000..eabba770a076ebcf98b96d38b3d8e0d3e101b5bb --- /dev/null +++ b/train_mt564_model.py @@ -0,0 +1,221 @@ +""" +MT564 TinyLlama Training Script +This script fine-tunes a TinyLlama model on MT564 format specifications data. + +we are using the model model_name = "sshleifer/tiny-gpt2" # Or "distilgpt2" +for better meory +""" + +import os +import json +import argparse +import logging + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Fine-tune TinyLlama on MT564 format specifications") + parser.add_argument( + "--model_name", + type=str, + #default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + default="sshleifer/tiny-gpt2", + help="Hugging Face model ID or path to local model" + ) + parser.add_argument( + "--training_data", + type=str, + default="mt564_training_data.json", + help="Path to the MT564 training data JSON file" + ) + parser.add_argument( + "--output_dir", + type=str, + #default="./mt564_tinyllama_model", + default="sshleifer/tiny-gpt2", + help="Directory to save the fine-tuned model" + ) + parser.add_argument( + "--epochs", + type=int, + default=3, + help="Number of training epochs" + ) + parser.add_argument( + "--batch_size", + type=int, + default=2, + help="Training batch size" + ) + parser.add_argument( + "--gradient_accumulation_steps", + type=int, + default=4, + help="Number of update steps to accumulate before performing a backward/update pass" + ) + parser.add_argument( + "--learning_rate", + type=float, + default=2e-5, + help="Learning rate" + ) + parser.add_argument( + "--max_length", + type=int, + default=512, + help="Maximum sequence length" + ) + parser.add_argument( + "--fp16", + action="store_true", + help="Whether to use fp16 16-bit (mixed) precision training" + ) + parser.add_argument( + "--eval_ratio", + type=float, + default=0.1, + help="Ratio of data to use for evaluation" + ) + return parser.parse_args() + +def format_training_data(data): + """Format training data for the model""" + formatted_data = [] + + for item in data: + # Format as a chat-like conversation for TinyLlama + formatted_text = f"<|im_start|>user\n{item['instruction']}<|im_end|>\n<|im_start|>assistant\n{item['response']}<|im_end|>" + formatted_data.append({"text": formatted_text}) + + return formatted_data + +def main(): + args = parse_args() + + try: + # Import necessary libraries + import torch + from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + Trainer, + TrainingArguments, + DataCollatorForLanguageModeling + ) + from datasets import Dataset + except ImportError: + logger.error("Required libraries not installed. Please install torch, transformers, and datasets.") + return + + # Load training data + logger.info(f"Loading training data from {args.training_data}") + with open(args.training_data, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Format data for training + formatted_data = format_training_data(data) + logger.info(f"Formatted {len(formatted_data)} training examples") + + # Create dataset + dataset = Dataset.from_list(formatted_data) + + # Split into training and evaluation sets + dataset = dataset.train_test_split(test_size=args.eval_ratio) + + # Load tokenizer + logger.info(f"Loading tokenizer for {args.model_name}") + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + + # Ensure the tokenizer has a padding token + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + # Tokenize datasets + def tokenize_function(examples): + return tokenizer( + examples["text"], + padding="max_length", + truncation=True, + max_length=args.max_length + ) + + logger.info("Tokenizing datasets") + tokenized_train = dataset["train"].map(tokenize_function, batched=True) + tokenized_eval = dataset["test"].map(tokenize_function, batched=True) + + # Create label column for causal language modeling + tokenized_train = tokenized_train.map( + lambda examples: {"labels": examples["input_ids"]}, + batched=True + ) + tokenized_eval = tokenized_eval.map( + lambda examples: {"labels": examples["input_ids"]}, + batched=True + ) + + # Remove the text column as it's no longer needed + tokenized_train = tokenized_train.remove_columns(["text"]) + tokenized_eval = tokenized_eval.remove_columns(["text"]) + + # Load model + logger.info(f"Loading model {args.model_name}") + model = AutoModelForCausalLM.from_pretrained( + args.model_name, + torch_dtype=torch.float16 if args.fp16 and torch.cuda.is_available() else torch.float32 + ) + # to save MAC memory + import torch + device = torch.device("cpu") + model.to(device) + + + # Set up training arguments + training_args = TrainingArguments( + output_dir=args.output_dir, + overwrite_output_dir=True, + num_train_epochs=args.epochs, + per_device_train_batch_size=args.batch_size, + per_device_eval_batch_size=args.batch_size, + gradient_accumulation_steps=args.gradient_accumulation_steps, + learning_rate=args.learning_rate, + weight_decay=0.01, + warmup_steps=100, + logging_dir=f"{args.output_dir}/logs", + logging_steps=10, + eval_steps=100, + save_steps=100, + evaluation_strategy="steps", + save_total_limit=2, + fp16=args.fp16 and torch.cuda.is_available(), + report_to="none" + ) + + # Set up data collator + data_collator = DataCollatorForLanguageModeling( + tokenizer=tokenizer, + mlm=False + ) + + # Set up trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_train, + eval_dataset=tokenized_eval, + data_collator=data_collator, + ) + + # Start training + logger.info("Starting fine-tuning") + trainer.train() + + # Save the model + logger.info(f"Saving model to {args.output_dir}") + trainer.save_model(args.output_dir) + tokenizer.save_pretrained(args.output_dir) + + logger.info("Fine-tuning complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/train_tinyllama.py b/train_tinyllama.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bead1bb983b48d03b1f4712553acf2654b3a9a --- /dev/null +++ b/train_tinyllama.py @@ -0,0 +1,161 @@ +""" +TinyLlama Training Script +This script provides functionality to fine-tune a TinyLlama model on custom data. +""" + +import os +import argparse +import torch +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + Trainer, + TrainingArguments, + DataCollatorForLanguageModeling, +) +from datasets import load_dataset +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Fine-tune a TinyLlama model") + parser.add_argument( + "--model_name", + type=str, + default="TinyLlama/TinyLlama-1.1B-Chat-v1.0", + help="Model identifier from HuggingFace" + ) + parser.add_argument( + "--data_path", + type=str, + required=True, + help="Path to the dataset file (JSON or CSV)" + ) + parser.add_argument( + "--output_dir", + type=str, + default="./results", + help="Directory to save model checkpoints" + ) + parser.add_argument( + "--epochs", + type=int, + default=3, + help="Number of training epochs" + ) + parser.add_argument( + "--batch_size", + type=int, + default=4, + help="Training batch size" + ) + parser.add_argument( + "--learning_rate", + type=float, + default=5e-5, + help="Learning rate" + ) + parser.add_argument( + "--text_column", + type=str, + default="text", + help="Column name containing the text data" + ) + parser.add_argument( + "--gradient_accumulation_steps", + type=int, + default=4, + help="Number of update steps to accumulate before updating weights" + ) + parser.add_argument( + "--max_length", + type=int, + default=512, + help="Maximum sequence length" + ) + parser.add_argument( + "--fp16", + action="store_true", + help="Use mixed precision training" + ) + return parser.parse_args() + +def preprocess_function(examples, tokenizer, max_length): + """Tokenize text examples""" + result = tokenizer( + examples["text"], + truncation=True, + max_length=max_length, + padding="max_length", + ) + result["labels"] = result["input_ids"].copy() + return result + +def main(): + args = parse_args() + + logger.info(f"Loading model: {args.model_name}") + model = AutoModelForCausalLM.from_pretrained(args.model_name) + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + # Load and prepare the dataset + logger.info(f"Loading dataset from: {args.data_path}") + data_extension = os.path.splitext(args.data_path)[1].replace(".", "") + + dataset = load_dataset(data_extension, data_files=args.data_path) + + # Preprocess the dataset + logger.info("Preprocessing dataset") + tokenized_dataset = dataset.map( + lambda examples: preprocess_function(examples, tokenizer, args.max_length), + batched=True, + remove_columns=dataset["train"].column_names, + ) + + # Setup training arguments + training_args = TrainingArguments( + output_dir=args.output_dir, + overwrite_output_dir=True, + num_train_epochs=args.epochs, + per_device_train_batch_size=args.batch_size, + gradient_accumulation_steps=args.gradient_accumulation_steps, + learning_rate=args.learning_rate, + weight_decay=0.01, + logging_dir=os.path.join(args.output_dir, "logs"), + logging_steps=10, + save_steps=500, + save_total_limit=2, + fp16=args.fp16, + ) + + # Setup trainer + data_collator = DataCollatorForLanguageModeling( + tokenizer=tokenizer, + mlm=False, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_dataset["train"], + data_collator=data_collator, + ) + + # Start training + logger.info("Starting training...") + trainer.train() + + # Save trained model + logger.info(f"Saving model to {args.output_dir}") + trainer.save_model(args.output_dir) + tokenizer.save_pretrained(args.output_dir) + + logger.info("Training complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..28039d3f7fbea66b357161e3356b504c671d4849 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ +# This file makes the utils directory a Python package diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..4c08f41f14794d9d470312550b55f315f68f5a95 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,58 @@ +import logging +import os +import sys +from typing import Optional + +def setup_logger( + logger_name: str = None, + log_level: int = logging.INFO, + log_format: Optional[str] = None, + log_file: Optional[str] = None, + console_output: bool = True +) -> logging.Logger: + """ + Set up a logger with the specified configuration + + Args: + logger_name: Name of the logger (uses root logger if None) + log_level: Logging level (default: INFO) + log_format: Custom log format (uses default if None) + log_file: Path to log file (logs to file if provided) + console_output: Whether to output logs to console + + Returns: + Configured logger instance + """ + # Get logger + logger = logging.getLogger(logger_name) + logger.setLevel(log_level) + + # Clear existing handlers if any + if logger.handlers: + for handler in logger.handlers[:]: + logger.removeHandler(handler) + + # Default log format + if log_format is None: + log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + + formatter = logging.Formatter(log_format) + + # Add file handler if log file is specified + if log_file: + # Ensure directory exists + log_dir = os.path.dirname(log_file) + if log_dir and not os.path.exists(log_dir): + os.makedirs(log_dir) + + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # Add console handler if enabled + if console_output: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + return logger diff --git a/utils/rate_limiter.py b/utils/rate_limiter.py new file mode 100644 index 0000000000000000000000000000000000000000..ec169f5999a7171288086f23af99dba4e3fa8704 --- /dev/null +++ b/utils/rate_limiter.py @@ -0,0 +1,58 @@ +import time +from collections import deque +from typing import Deque, Tuple + + +class RateLimiter: + """ + Simple rate limiter to prevent overwhelming target websites or APIs + Implements a sliding window algorithm + """ + + def __init__(self, window_size: int = 60, max_requests: int = 10): + """ + Initialize the rate limiter + + Args: + window_size: Time window in seconds + max_requests: Maximum allowed requests in the window + """ + self.window_size = window_size + self.max_requests = max_requests + self.request_timestamps: Deque[float] = deque() + + def record_request(self) -> None: + """Record a request with the current timestamp""" + current_time = time.time() + self.request_timestamps.append(current_time) + self._clean_old_timestamps(current_time) + + def can_proceed(self) -> bool: + """Check if a new request can proceed within rate limits""" + current_time = time.time() + self._clean_old_timestamps(current_time) + return len(self.request_timestamps) < self.max_requests + + def get_wait_time(self) -> float: + """Calculate time to wait (in seconds) before next request is allowed""" + if self.can_proceed(): + return 0.0 + + current_time = time.time() + oldest_allowed_time = current_time - self.window_size + + if not self.request_timestamps: + return 0.0 + + # Find the oldest timestamp in the window and calculate when it will expire + oldest_in_window = self.request_timestamps[0] + time_until_oldest_expires = oldest_in_window - oldest_allowed_time + + return max(0.0, time_until_oldest_expires) + + def _clean_old_timestamps(self, current_time: float) -> None: + """Remove timestamps that are outside the current window""" + oldest_allowed_time = current_time - self.window_size + + while self.request_timestamps and self.request_timestamps[0] < oldest_allowed_time: + self.request_timestamps.popleft() diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000000000000000000000000000000000000..da9577b2b159e03c00b46062362744b756ef7531 --- /dev/null +++ b/uv.lock @@ -0,0 +1,961 @@ +version = 1 +requires-python = ">=3.11" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anyio" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 }, +] + +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 }, +] + +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458 }, +] + +[[package]] +name = "certifi" +version = "2025.4.26" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794 }, + { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846 }, + { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350 }, + { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657 }, + { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260 }, + { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164 }, + { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571 }, + { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952 }, + { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959 }, + { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030 }, + { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015 }, + { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106 }, + { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402 }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936 }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790 }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924 }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626 }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567 }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957 }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408 }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399 }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815 }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537 }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565 }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357 }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776 }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622 }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435 }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653 }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231 }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243 }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442 }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147 }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057 }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454 }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174 }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166 }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064 }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641 }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626 }, +] + +[[package]] +name = "click" +version = "8.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/0f/62ca20172d4f87d93cf89665fbaedcd560ac48b465bd1d92bfc7ea6b0a41/click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d", size = 235857 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/58/1f37bf81e3c689cc74ffa42102fa8915b59085f54a6e4a80bc6265c0f6bf/click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c", size = 102156 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "courlan" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "tld" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848 }, +] + +[[package]] +name = "dateparser" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "regex" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/3f/d3207a05f5b6a78c66d86631e60bfba5af163738a599a5b9aa2c2737a09e/dateparser-1.2.1.tar.gz", hash = "sha256:7e4919aeb48481dbfc01ac9683c8e20bfe95bb715a38c1e9f6af889f4f30ccc3", size = 309924 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/0a/981c438c4cd84147c781e4e96c1d72df03775deb1bc76c5a6ee8afa89c62/dateparser-1.2.1-py3-none-any.whl", hash = "sha256:bdcac262a467e6260030040748ad7c10d6bacd4f3b9cdb4cfd2251939174508c", size = 295658 }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + +[[package]] +name = "dnspython" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632 }, +] + +[[package]] +name = "email-validator" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521 }, +] + +[[package]] +name = "flask" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blinker" }, + { name = "click" }, + { name = "itsdangerous" }, + { name = "jinja2" }, + { name = "markupsafe" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/de/e47735752347f4128bcf354e0da07ef311a78244eba9e3dc1d4a5ab21a98/flask-3.1.1.tar.gz", hash = "sha256:284c7b8f2f58cb737f0cf1c30fd7eaf0ccfcde196099d24ecede3fc2005aa59e", size = 753440 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305 }, +] + +[[package]] +name = "flask-cors" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flask" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/32/d8/667bd90d1ee41c96e938bafe81052494e70b7abd9498c4a0215c103b9667/flask_cors-5.0.1.tar.gz", hash = "sha256:6ccb38d16d6b72bbc156c1c3f192bc435bfcc3c2bc864b2df1eb9b2d97b2403c", size = 11643 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/61/4aea5fb55be1b6f95e604627dc6c50c47d693e39cab2ac086ee0155a0abd/flask_cors-5.0.1-py3-none-any.whl", hash = "sha256:fa5cb364ead54bbf401a26dbf03030c6b18fb2fcaf70408096a572b409586b0c", size = 11296 }, +] + +[[package]] +name = "flask-sqlalchemy" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flask" }, + { name = "sqlalchemy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/53/b0a9fcc1b1297f51e68b69ed3b7c3c40d8c45be1391d77ae198712914392/flask_sqlalchemy-3.1.1.tar.gz", hash = "sha256:e4b68bb881802dda1a7d878b2fc84c06d1ee57fb40b874d3dc97dabfa36b8312", size = 81899 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/6a/89963a5c6ecf166e8be29e0d1bf6806051ee8fe6c82e232842e3aeac9204/flask_sqlalchemy-3.1.1-py3-none-any.whl", hash = "sha256:4ba4be7f419dc72f4efd8802d69974803c37259dd42f3913b0dcf75c9447e0a0", size = 25125 }, +] + +[[package]] +name = "greenlet" +version = "3.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/34/c1/a82edae11d46c0d83481aacaa1e578fea21d94a1ef400afd734d47ad95ad/greenlet-3.2.2.tar.gz", hash = "sha256:ad053d34421a2debba45aa3cc39acf454acbcd025b3fc1a9f8a0dee237abd485", size = 185797 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/9f/a47e19261747b562ce88219e5ed8c859d42c6e01e73da6fbfa3f08a7be13/greenlet-3.2.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:dcb9cebbf3f62cb1e5afacae90761ccce0effb3adaa32339a0670fe7805d8068", size = 268635 }, + { url = "https://files.pythonhosted.org/packages/11/80/a0042b91b66975f82a914d515e81c1944a3023f2ce1ed7a9b22e10b46919/greenlet-3.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf3fc9145141250907730886b031681dfcc0de1c158f3cc51c092223c0f381ce", size = 628786 }, + { url = "https://files.pythonhosted.org/packages/38/a2/8336bf1e691013f72a6ebab55da04db81a11f68e82bb691f434909fa1327/greenlet-3.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:efcdfb9df109e8a3b475c016f60438fcd4be68cd13a365d42b35914cdab4bb2b", size = 640866 }, + { url = "https://files.pythonhosted.org/packages/f8/7e/f2a3a13e424670a5d08826dab7468fa5e403e0fbe0b5f951ff1bc4425b45/greenlet-3.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd139e4943547ce3a56ef4b8b1b9479f9e40bb47e72cc906f0f66b9d0d5cab3", size = 636752 }, + { url = "https://files.pythonhosted.org/packages/fd/5d/ce4a03a36d956dcc29b761283f084eb4a3863401c7cb505f113f73af8774/greenlet-3.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71566302219b17ca354eb274dfd29b8da3c268e41b646f330e324e3967546a74", size = 636028 }, + { url = "https://files.pythonhosted.org/packages/4b/29/b130946b57e3ceb039238413790dd3793c5e7b8e14a54968de1fe449a7cf/greenlet-3.2.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3091bc45e6b0c73f225374fefa1536cd91b1e987377b12ef5b19129b07d93ebe", size = 583869 }, + { url = "https://files.pythonhosted.org/packages/ac/30/9f538dfe7f87b90ecc75e589d20cbd71635531a617a336c386d775725a8b/greenlet-3.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:44671c29da26539a5f142257eaba5110f71887c24d40df3ac87f1117df589e0e", size = 1112886 }, + { url = "https://files.pythonhosted.org/packages/be/92/4b7deeb1a1e9c32c1b59fdca1cac3175731c23311ddca2ea28a8b6ada91c/greenlet-3.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c23ea227847c9dbe0b3910f5c0dd95658b607137614eb821e6cbaecd60d81cc6", size = 1138355 }, + { url = "https://files.pythonhosted.org/packages/c5/eb/7551c751a2ea6498907b2fcbe31d7a54b602ba5e8eb9550a9695ca25d25c/greenlet-3.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:0a16fb934fcabfdfacf21d79e6fed81809d8cd97bc1be9d9c89f0e4567143d7b", size = 295437 }, + { url = "https://files.pythonhosted.org/packages/2c/a1/88fdc6ce0df6ad361a30ed78d24c86ea32acb2b563f33e39e927b1da9ea0/greenlet-3.2.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:df4d1509efd4977e6a844ac96d8be0b9e5aa5d5c77aa27ca9f4d3f92d3fcf330", size = 270413 }, + { url = "https://files.pythonhosted.org/packages/a6/2e/6c1caffd65490c68cd9bcec8cb7feb8ac7b27d38ba1fea121fdc1f2331dc/greenlet-3.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da956d534a6d1b9841f95ad0f18ace637668f680b1339ca4dcfb2c1837880a0b", size = 637242 }, + { url = "https://files.pythonhosted.org/packages/98/28/088af2cedf8823b6b7ab029a5626302af4ca1037cf8b998bed3a8d3cb9e2/greenlet-3.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c7b15fb9b88d9ee07e076f5a683027bc3befd5bb5d25954bb633c385d8b737e", size = 651444 }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0116ab876bb0bc7a81eadc21c3f02cd6100dcd25a1cf2a085a130a63a26a/greenlet-3.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:752f0e79785e11180ebd2e726c8a88109ded3e2301d40abced2543aa5d164275", size = 646067 }, + { url = "https://files.pythonhosted.org/packages/35/17/bb8f9c9580e28a94a9575da847c257953d5eb6e39ca888239183320c1c28/greenlet-3.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ae572c996ae4b5e122331e12bbb971ea49c08cc7c232d1bd43150800a2d6c65", size = 648153 }, + { url = "https://files.pythonhosted.org/packages/2c/ee/7f31b6f7021b8df6f7203b53b9cc741b939a2591dcc6d899d8042fcf66f2/greenlet-3.2.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02f5972ff02c9cf615357c17ab713737cccfd0eaf69b951084a9fd43f39833d3", size = 603865 }, + { url = "https://files.pythonhosted.org/packages/b5/2d/759fa59323b521c6f223276a4fc3d3719475dc9ae4c44c2fe7fc750f8de0/greenlet-3.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4fefc7aa68b34b9224490dfda2e70ccf2131368493add64b4ef2d372955c207e", size = 1119575 }, + { url = "https://files.pythonhosted.org/packages/30/05/356813470060bce0e81c3df63ab8cd1967c1ff6f5189760c1a4734d405ba/greenlet-3.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a31ead8411a027c2c4759113cf2bd473690517494f3d6e4bf67064589afcd3c5", size = 1147460 }, + { url = "https://files.pythonhosted.org/packages/07/f4/b2a26a309a04fb844c7406a4501331b9400e1dd7dd64d3450472fd47d2e1/greenlet-3.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:b24c7844c0a0afc3ccbeb0b807adeefb7eff2b5599229ecedddcfeb0ef333bec", size = 296239 }, + { url = "https://files.pythonhosted.org/packages/89/30/97b49779fff8601af20972a62cc4af0c497c1504dfbb3e93be218e093f21/greenlet-3.2.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:3ab7194ee290302ca15449f601036007873028712e92ca15fc76597a0aeb4c59", size = 269150 }, + { url = "https://files.pythonhosted.org/packages/21/30/877245def4220f684bc2e01df1c2e782c164e84b32e07373992f14a2d107/greenlet-3.2.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc5c43bb65ec3669452af0ab10729e8fdc17f87a1f2ad7ec65d4aaaefabf6bf", size = 637381 }, + { url = "https://files.pythonhosted.org/packages/8e/16/adf937908e1f913856b5371c1d8bdaef5f58f251d714085abeea73ecc471/greenlet-3.2.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:decb0658ec19e5c1f519faa9a160c0fc85a41a7e6654b3ce1b44b939f8bf1325", size = 651427 }, + { url = "https://files.pythonhosted.org/packages/ad/49/6d79f58fa695b618654adac64e56aff2eeb13344dc28259af8f505662bb1/greenlet-3.2.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fadd183186db360b61cb34e81117a096bff91c072929cd1b529eb20dd46e6c5", size = 645795 }, + { url = "https://files.pythonhosted.org/packages/5a/e6/28ed5cb929c6b2f001e96b1d0698c622976cd8f1e41fe7ebc047fa7c6dd4/greenlet-3.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1919cbdc1c53ef739c94cf2985056bcc0838c1f217b57647cbf4578576c63825", size = 648398 }, + { url = "https://files.pythonhosted.org/packages/9d/70/b200194e25ae86bc57077f695b6cc47ee3118becf54130c5514456cf8dac/greenlet-3.2.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3885f85b61798f4192d544aac7b25a04ece5fe2704670b4ab73c2d2c14ab740d", size = 606795 }, + { url = "https://files.pythonhosted.org/packages/f8/c8/ba1def67513a941154ed8f9477ae6e5a03f645be6b507d3930f72ed508d3/greenlet-3.2.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:85f3e248507125bf4af607a26fd6cb8578776197bd4b66e35229cdf5acf1dfbf", size = 1117976 }, + { url = "https://files.pythonhosted.org/packages/c3/30/d0e88c1cfcc1b3331d63c2b54a0a3a4a950ef202fb8b92e772ca714a9221/greenlet-3.2.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1e76106b6fc55fa3d6fe1c527f95ee65e324a13b62e243f77b48317346559708", size = 1145509 }, + { url = "https://files.pythonhosted.org/packages/90/2e/59d6491834b6e289051b252cf4776d16da51c7c6ca6a87ff97e3a50aa0cd/greenlet-3.2.2-cp313-cp313-win_amd64.whl", hash = "sha256:fe46d4f8e94e637634d54477b0cfabcf93c53f29eedcbdeecaf2af32029b4421", size = 296023 }, + { url = "https://files.pythonhosted.org/packages/65/66/8a73aace5a5335a1cba56d0da71b7bd93e450f17d372c5b7c5fa547557e9/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba30e88607fb6990544d84caf3c706c4b48f629e18853fc6a646f82db9629418", size = 629911 }, + { url = "https://files.pythonhosted.org/packages/48/08/c8b8ebac4e0c95dcc68ec99198842e7db53eda4ab3fb0a4e785690883991/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:055916fafad3e3388d27dd68517478933a97edc2fc54ae79d3bec827de2c64c4", size = 635251 }, + { url = "https://files.pythonhosted.org/packages/37/26/7db30868f73e86b9125264d2959acabea132b444b88185ba5c462cb8e571/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2593283bf81ca37d27d110956b79e8723f9aa50c4bcdc29d3c0543d4743d2763", size = 632620 }, + { url = "https://files.pythonhosted.org/packages/10/ec/718a3bd56249e729016b0b69bee4adea0dfccf6ca43d147ef3b21edbca16/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89c69e9a10670eb7a66b8cef6354c24671ba241f46152dd3eed447f79c29fb5b", size = 628851 }, + { url = "https://files.pythonhosted.org/packages/9b/9d/d1c79286a76bc62ccdc1387291464af16a4204ea717f24e77b0acd623b99/greenlet-3.2.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02a98600899ca1ca5d3a2590974c9e3ec259503b2d6ba6527605fcd74e08e207", size = 593718 }, + { url = "https://files.pythonhosted.org/packages/cd/41/96ba2bf948f67b245784cd294b84e3d17933597dffd3acdb367a210d1949/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:b50a8c5c162469c3209e5ec92ee4f95c8231b11db6a04db09bbe338176723bb8", size = 1105752 }, + { url = "https://files.pythonhosted.org/packages/68/3b/3b97f9d33c1f2eb081759da62bd6162159db260f602f048bc2f36b4c453e/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:45f9f4853fb4cc46783085261c9ec4706628f3b57de3e68bae03e8f8b3c0de51", size = 1125170 }, + { url = "https://files.pythonhosted.org/packages/31/df/b7d17d66c8d0f578d2885a3d8f565e9e4725eacc9d3fdc946d0031c055c4/greenlet-3.2.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:9ea5231428af34226c05f927e16fc7f6fa5e39e3ad3cd24ffa48ba53a47f4240", size = 269899 }, +] + +[[package]] +name = "gunicorn" +version = "23.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "htmldate" +version = "1.9.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "dateparser" }, + { name = "lxml" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/26/aaae4cab984f0b7dd0f5f1b823fa2ed2fd4a2bb50acd5bd2f0d217562678/htmldate-1.9.3.tar.gz", hash = "sha256:ac0caf4628c3ded4042011e2d60dc68dfb314c77b106587dd307a80d77e708e9", size = 44913 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/49/8872130016209c20436ce0c1067de1cf630755d0443d068a5bc17fa95015/htmldate-1.9.3-py3-none-any.whl", hash = "sha256:3fadc422cf3c10a5cdb5e1b914daf37ec7270400a80a1b37e2673ff84faaaff8", size = 31565 }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "itsdangerous" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234 }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + +[[package]] +name = "jiter" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/44/e241a043f114299254e44d7e777ead311da400517f179665e59611ab0ee4/jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", size = 314654 }, + { url = "https://files.pythonhosted.org/packages/fb/1b/a7e5e42db9fa262baaa9489d8d14ca93f8663e7f164ed5e9acc9f467fc00/jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", size = 320909 }, + { url = "https://files.pythonhosted.org/packages/60/bf/8ebdfce77bc04b81abf2ea316e9c03b4a866a7d739cf355eae4d6fd9f6fe/jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", size = 341733 }, + { url = "https://files.pythonhosted.org/packages/a8/4e/754ebce77cff9ab34d1d0fa0fe98f5d42590fd33622509a3ba6ec37ff466/jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b", size = 365097 }, + { url = "https://files.pythonhosted.org/packages/32/2c/6019587e6f5844c612ae18ca892f4cd7b3d8bbf49461ed29e384a0f13d98/jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5", size = 406603 }, + { url = "https://files.pythonhosted.org/packages/da/e9/c9e6546c817ab75a1a7dab6dcc698e62e375e1017113e8e983fccbd56115/jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572", size = 396625 }, + { url = "https://files.pythonhosted.org/packages/be/bd/976b458add04271ebb5a255e992bd008546ea04bb4dcadc042a16279b4b4/jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15", size = 351832 }, + { url = "https://files.pythonhosted.org/packages/07/51/fe59e307aaebec9265dbad44d9d4381d030947e47b0f23531579b9a7c2df/jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419", size = 384590 }, + { url = "https://files.pythonhosted.org/packages/db/55/5dcd2693794d8e6f4889389ff66ef3be557a77f8aeeca8973a97a7c00557/jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043", size = 520690 }, + { url = "https://files.pythonhosted.org/packages/54/d5/9f51dc90985e9eb251fbbb747ab2b13b26601f16c595a7b8baba964043bd/jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965", size = 512649 }, + { url = "https://files.pythonhosted.org/packages/a6/e5/4e385945179bcf128fa10ad8dca9053d717cbe09e258110e39045c881fe5/jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2", size = 206920 }, + { url = "https://files.pythonhosted.org/packages/4c/47/5e0b94c603d8e54dd1faab439b40b832c277d3b90743e7835879ab663757/jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd", size = 210119 }, + { url = "https://files.pythonhosted.org/packages/af/d7/c55086103d6f29b694ec79156242304adf521577530d9031317ce5338c59/jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11", size = 309203 }, + { url = "https://files.pythonhosted.org/packages/b0/01/f775dfee50beb420adfd6baf58d1c4d437de41c9b666ddf127c065e5a488/jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e", size = 319678 }, + { url = "https://files.pythonhosted.org/packages/ab/b8/09b73a793714726893e5d46d5c534a63709261af3d24444ad07885ce87cb/jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2", size = 341816 }, + { url = "https://files.pythonhosted.org/packages/35/6f/b8f89ec5398b2b0d344257138182cc090302854ed63ed9c9051e9c673441/jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75", size = 364152 }, + { url = "https://files.pythonhosted.org/packages/9b/ca/978cc3183113b8e4484cc7e210a9ad3c6614396e7abd5407ea8aa1458eef/jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d", size = 406991 }, + { url = "https://files.pythonhosted.org/packages/13/3a/72861883e11a36d6aa314b4922125f6ae90bdccc225cd96d24cc78a66385/jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42", size = 395824 }, + { url = "https://files.pythonhosted.org/packages/87/67/22728a86ef53589c3720225778f7c5fdb617080e3deaed58b04789418212/jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc", size = 351318 }, + { url = "https://files.pythonhosted.org/packages/69/b9/f39728e2e2007276806d7a6609cda7fac44ffa28ca0d02c49a4f397cc0d9/jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc", size = 384591 }, + { url = "https://files.pythonhosted.org/packages/eb/8f/8a708bc7fd87b8a5d861f1c118a995eccbe6d672fe10c9753e67362d0dd0/jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e", size = 520746 }, + { url = "https://files.pythonhosted.org/packages/95/1e/65680c7488bd2365dbd2980adaf63c562d3d41d3faac192ebc7ef5b4ae25/jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d", size = 512754 }, + { url = "https://files.pythonhosted.org/packages/78/f3/fdc43547a9ee6e93c837685da704fb6da7dba311fc022e2766d5277dfde5/jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06", size = 207075 }, + { url = "https://files.pythonhosted.org/packages/cd/9d/742b289016d155f49028fe1bfbeb935c9bf0ffeefdf77daf4a63a42bb72b/jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0", size = 207999 }, + { url = "https://files.pythonhosted.org/packages/e7/1b/4cd165c362e8f2f520fdb43245e2b414f42a255921248b4f8b9c8d871ff1/jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7", size = 308197 }, + { url = "https://files.pythonhosted.org/packages/13/aa/7a890dfe29c84c9a82064a9fe36079c7c0309c91b70c380dc138f9bea44a/jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b", size = 318160 }, + { url = "https://files.pythonhosted.org/packages/6a/38/5888b43fc01102f733f085673c4f0be5a298f69808ec63de55051754e390/jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69", size = 341259 }, + { url = "https://files.pythonhosted.org/packages/3d/5e/bbdbb63305bcc01006de683b6228cd061458b9b7bb9b8d9bc348a58e5dc2/jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103", size = 363730 }, + { url = "https://files.pythonhosted.org/packages/75/85/53a3edc616992fe4af6814c25f91ee3b1e22f7678e979b6ea82d3bc0667e/jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635", size = 405126 }, + { url = "https://files.pythonhosted.org/packages/ae/b3/1ee26b12b2693bd3f0b71d3188e4e5d817b12e3c630a09e099e0a89e28fa/jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4", size = 393668 }, + { url = "https://files.pythonhosted.org/packages/11/87/e084ce261950c1861773ab534d49127d1517b629478304d328493f980791/jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d", size = 352350 }, + { url = "https://files.pythonhosted.org/packages/f0/06/7dca84b04987e9df563610aa0bc154ea176e50358af532ab40ffb87434df/jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3", size = 384204 }, + { url = "https://files.pythonhosted.org/packages/16/2f/82e1c6020db72f397dd070eec0c85ebc4df7c88967bc86d3ce9864148f28/jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5", size = 520322 }, + { url = "https://files.pythonhosted.org/packages/36/fd/4f0cd3abe83ce208991ca61e7e5df915aa35b67f1c0633eb7cf2f2e88ec7/jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d", size = 512184 }, + { url = "https://files.pythonhosted.org/packages/a0/3c/8a56f6d547731a0b4410a2d9d16bf39c861046f91f57c98f7cab3d2aa9ce/jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53", size = 206504 }, + { url = "https://files.pythonhosted.org/packages/f4/1c/0c996fd90639acda75ed7fa698ee5fd7d80243057185dc2f63d4c1c9f6b9/jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7", size = 204943 }, + { url = "https://files.pythonhosted.org/packages/78/0f/77a63ca7aa5fed9a1b9135af57e190d905bcd3702b36aca46a01090d39ad/jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001", size = 317281 }, + { url = "https://files.pythonhosted.org/packages/f9/39/a3a1571712c2bf6ec4c657f0d66da114a63a2e32b7e4eb8e0b83295ee034/jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a", size = 350273 }, + { url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 }, +] + +[[package]] +name = "justext" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml", extra = ["html-clean"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940 }, +] + +[[package]] +name = "lxml" +version = "5.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/2d/67693cc8a605a12e5975380d7ff83020dcc759351b5a066e1cced04f797b/lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9", size = 8083240 }, + { url = "https://files.pythonhosted.org/packages/73/53/b5a05ab300a808b72e848efd152fe9c022c0181b0a70b8bca1199f1bed26/lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7", size = 4387685 }, + { url = "https://files.pythonhosted.org/packages/d8/cb/1a3879c5f512bdcd32995c301886fe082b2edd83c87d41b6d42d89b4ea4d/lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa", size = 4991164 }, + { url = "https://files.pythonhosted.org/packages/f9/94/bbc66e42559f9d04857071e3b3d0c9abd88579367fd2588a4042f641f57e/lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df", size = 4746206 }, + { url = "https://files.pythonhosted.org/packages/66/95/34b0679bee435da2d7cae895731700e519a8dfcab499c21662ebe671603e/lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e", size = 5342144 }, + { url = "https://files.pythonhosted.org/packages/e0/5d/abfcc6ab2fa0be72b2ba938abdae1f7cad4c632f8d552683ea295d55adfb/lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44", size = 4825124 }, + { url = "https://files.pythonhosted.org/packages/5a/78/6bd33186c8863b36e084f294fc0a5e5eefe77af95f0663ef33809cc1c8aa/lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba", size = 4876520 }, + { url = "https://files.pythonhosted.org/packages/3b/74/4d7ad4839bd0fc64e3d12da74fc9a193febb0fae0ba6ebd5149d4c23176a/lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba", size = 4765016 }, + { url = "https://files.pythonhosted.org/packages/24/0d/0a98ed1f2471911dadfc541003ac6dd6879fc87b15e1143743ca20f3e973/lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c", size = 5362884 }, + { url = "https://files.pythonhosted.org/packages/48/de/d4f7e4c39740a6610f0f6959052b547478107967362e8424e1163ec37ae8/lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8", size = 4902690 }, + { url = "https://files.pythonhosted.org/packages/07/8c/61763abd242af84f355ca4ef1ee096d3c1b7514819564cce70fd18c22e9a/lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86", size = 4944418 }, + { url = "https://files.pythonhosted.org/packages/f9/c5/6d7e3b63e7e282619193961a570c0a4c8a57fe820f07ca3fe2f6bd86608a/lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056", size = 4827092 }, + { url = "https://files.pythonhosted.org/packages/71/4a/e60a306df54680b103348545706a98a7514a42c8b4fbfdcaa608567bb065/lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7", size = 5418231 }, + { url = "https://files.pythonhosted.org/packages/27/f2/9754aacd6016c930875854f08ac4b192a47fe19565f776a64004aa167521/lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd", size = 5261798 }, + { url = "https://files.pythonhosted.org/packages/38/a2/0c49ec6941428b1bd4f280650d7b11a0f91ace9db7de32eb7aa23bcb39ff/lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751", size = 4988195 }, + { url = "https://files.pythonhosted.org/packages/7a/75/87a3963a08eafc46a86c1131c6e28a4de103ba30b5ae903114177352a3d7/lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4", size = 3474243 }, + { url = "https://files.pythonhosted.org/packages/fa/f9/1f0964c4f6c2be861c50db380c554fb8befbea98c6404744ce243a3c87ef/lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539", size = 3815197 }, + { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392 }, + { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103 }, + { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224 }, + { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913 }, + { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441 }, + { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580 }, + { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493 }, + { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679 }, + { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691 }, + { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075 }, + { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680 }, + { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253 }, + { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651 }, + { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315 }, + { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149 }, + { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095 }, + { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086 }, + { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613 }, + { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008 }, + { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915 }, + { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890 }, + { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644 }, + { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817 }, + { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916 }, + { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274 }, + { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757 }, + { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028 }, + { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487 }, + { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688 }, + { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043 }, + { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569 }, + { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270 }, + { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606 }, +] + +[package.optional-dependencies] +html-clean = [ + { name = "lxml-html-clean" }, +] + +[[package]] +name = "lxml-html-clean" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/b6/466e71db127950fb8d172026a8f0a9f0dc6f64c8e78e2ca79f252e5790b8/lxml_html_clean-0.4.2.tar.gz", hash = "sha256:91291e7b5db95430abf461bc53440964d58e06cc468950f9e47db64976cebcb3", size = 21622 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/0b/942cb7278d6caad79343ad2ddd636ed204a47909b969d19114a3097f5aa3/lxml_html_clean-0.4.2-py3-none-any.whl", hash = "sha256:74ccfba277adcfea87a1e9294f47dd86b05d65b4da7c5b07966e3d5f3be8a505", size = 14184 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, +] + +[[package]] +name = "openai" +version = "1.78.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/3f/4e5e7b0548a15eabc4a755c93cd5f9564887e3d2fd45b6ff531352e5859d/openai-1.78.1.tar.gz", hash = "sha256:8b26b364531b100df1b961d03560042e5f5be11301d7d49a6cd1a2b9af824dca", size = 442985 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/4c/3889bc332a6c743751eb78a4bada5761e50a8a847ff0e46c1bd23ce12362/openai-1.78.1-py3-none-any.whl", hash = "sha256:7368bf147ca499804cc408fe68cdb6866a060f38dec961bbc97b04f9d917907e", size = 680917 }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, +] + +[[package]] +name = "pydantic" +version = "2.11.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/ab/5250d56ad03884ab5efd07f734203943c8a8ab40d551e208af81d0257bf2/pydantic-2.11.4.tar.gz", hash = "sha256:32738d19d63a226a52eed76645a98ee07c1f410ee41d93b4afbfa85ed8111c2d", size = 786540 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/12/46b65f3534d099349e38ef6ec98b1a5a81f42536d17e0ba382c28c67ba67/pydantic-2.11.4-py3-none-any.whl", hash = "sha256:d9615eaa9ac5a063471da949c8fc16376a84afb5024688b3ff885693506764eb", size = 443900 }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584 }, + { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071 }, + { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823 }, + { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792 }, + { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338 }, + { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998 }, + { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200 }, + { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890 }, + { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359 }, + { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883 }, + { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074 }, + { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538 }, + { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909 }, + { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786 }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000 }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996 }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957 }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199 }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296 }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109 }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028 }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044 }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881 }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034 }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187 }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628 }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866 }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894 }, + { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688 }, + { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808 }, + { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580 }, + { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859 }, + { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810 }, + { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498 }, + { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611 }, + { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924 }, + { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196 }, + { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389 }, + { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223 }, + { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473 }, + { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269 }, + { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921 }, + { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162 }, + { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560 }, + { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777 }, + { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200 }, + { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123 }, + { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852 }, + { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484 }, + { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896 }, + { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475 }, + { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013 }, + { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715 }, + { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, +] + +[[package]] +name = "regex" +version = "2024.11.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/58/7e4d9493a66c88a7da6d205768119f51af0f684fe7be7bac8328e217a52c/regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638", size = 482669 }, + { url = "https://files.pythonhosted.org/packages/34/4c/8f8e631fcdc2ff978609eaeef1d6994bf2f028b59d9ac67640ed051f1218/regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7", size = 287684 }, + { url = "https://files.pythonhosted.org/packages/c5/1b/f0e4d13e6adf866ce9b069e191f303a30ab1277e037037a365c3aad5cc9c/regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20", size = 284589 }, + { url = "https://files.pythonhosted.org/packages/25/4d/ab21047f446693887f25510887e6820b93f791992994f6498b0318904d4a/regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114", size = 792121 }, + { url = "https://files.pythonhosted.org/packages/45/ee/c867e15cd894985cb32b731d89576c41a4642a57850c162490ea34b78c3b/regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3", size = 831275 }, + { url = "https://files.pythonhosted.org/packages/b3/12/b0f480726cf1c60f6536fa5e1c95275a77624f3ac8fdccf79e6727499e28/regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f", size = 818257 }, + { url = "https://files.pythonhosted.org/packages/bf/ce/0d0e61429f603bac433910d99ef1a02ce45a8967ffbe3cbee48599e62d88/regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0", size = 792727 }, + { url = "https://files.pythonhosted.org/packages/e4/c1/243c83c53d4a419c1556f43777ccb552bccdf79d08fda3980e4e77dd9137/regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55", size = 780667 }, + { url = "https://files.pythonhosted.org/packages/c5/f4/75eb0dd4ce4b37f04928987f1d22547ddaf6c4bae697623c1b05da67a8aa/regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89", size = 776963 }, + { url = "https://files.pythonhosted.org/packages/16/5d/95c568574e630e141a69ff8a254c2f188b4398e813c40d49228c9bbd9875/regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d", size = 784700 }, + { url = "https://files.pythonhosted.org/packages/8e/b5/f8495c7917f15cc6fee1e7f395e324ec3e00ab3c665a7dc9d27562fd5290/regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34", size = 848592 }, + { url = "https://files.pythonhosted.org/packages/1c/80/6dd7118e8cb212c3c60b191b932dc57db93fb2e36fb9e0e92f72a5909af9/regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d", size = 852929 }, + { url = "https://files.pythonhosted.org/packages/11/9b/5a05d2040297d2d254baf95eeeb6df83554e5e1df03bc1a6687fc4ba1f66/regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45", size = 781213 }, + { url = "https://files.pythonhosted.org/packages/26/b7/b14e2440156ab39e0177506c08c18accaf2b8932e39fb092074de733d868/regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9", size = 261734 }, + { url = "https://files.pythonhosted.org/packages/80/32/763a6cc01d21fb3819227a1cc3f60fd251c13c37c27a73b8ff4315433a8e/regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60", size = 274052 }, + { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781 }, + { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455 }, + { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759 }, + { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976 }, + { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077 }, + { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160 }, + { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896 }, + { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997 }, + { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725 }, + { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481 }, + { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896 }, + { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138 }, + { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692 }, + { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135 }, + { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567 }, + { url = "https://files.pythonhosted.org/packages/90/73/bcb0e36614601016552fa9344544a3a2ae1809dc1401b100eab02e772e1f/regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", size = 483525 }, + { url = "https://files.pythonhosted.org/packages/0f/3f/f1a082a46b31e25291d830b369b6b0c5576a6f7fb89d3053a354c24b8a83/regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", size = 288324 }, + { url = "https://files.pythonhosted.org/packages/09/c9/4e68181a4a652fb3ef5099e077faf4fd2a694ea6e0f806a7737aff9e758a/regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", size = 284617 }, + { url = "https://files.pythonhosted.org/packages/fc/fd/37868b75eaf63843165f1d2122ca6cb94bfc0271e4428cf58c0616786dce/regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", size = 795023 }, + { url = "https://files.pythonhosted.org/packages/c4/7c/d4cd9c528502a3dedb5c13c146e7a7a539a3853dc20209c8e75d9ba9d1b2/regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", size = 833072 }, + { url = "https://files.pythonhosted.org/packages/4f/db/46f563a08f969159c5a0f0e722260568425363bea43bb7ae370becb66a67/regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", size = 823130 }, + { url = "https://files.pythonhosted.org/packages/db/60/1eeca2074f5b87df394fccaa432ae3fc06c9c9bfa97c5051aed70e6e00c2/regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", size = 796857 }, + { url = "https://files.pythonhosted.org/packages/10/db/ac718a08fcee981554d2f7bb8402f1faa7e868c1345c16ab1ebec54b0d7b/regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", size = 784006 }, + { url = "https://files.pythonhosted.org/packages/c2/41/7da3fe70216cea93144bf12da2b87367590bcf07db97604edeea55dac9ad/regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", size = 781650 }, + { url = "https://files.pythonhosted.org/packages/a7/d5/880921ee4eec393a4752e6ab9f0fe28009435417c3102fc413f3fe81c4e5/regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", size = 789545 }, + { url = "https://files.pythonhosted.org/packages/dc/96/53770115e507081122beca8899ab7f5ae28ae790bfcc82b5e38976df6a77/regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", size = 853045 }, + { url = "https://files.pythonhosted.org/packages/31/d3/1372add5251cc2d44b451bd94f43b2ec78e15a6e82bff6a290ef9fd8f00a/regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", size = 860182 }, + { url = "https://files.pythonhosted.org/packages/ed/e3/c446a64984ea9f69982ba1a69d4658d5014bc7a0ea468a07e1a1265db6e2/regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", size = 787733 }, + { url = "https://files.pythonhosted.org/packages/2b/f1/e40c8373e3480e4f29f2692bd21b3e05f296d3afebc7e5dcf21b9756ca1c/regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", size = 262122 }, + { url = "https://files.pythonhosted.org/packages/45/94/bc295babb3062a731f52621cdc992d123111282e291abaf23faa413443ea/regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", size = 273545 }, +] + +[[package]] +name = "repl-nix-workspace" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "email-validator" }, + { name = "flask" }, + { name = "flask-cors" }, + { name = "flask-sqlalchemy" }, + { name = "gunicorn" }, + { name = "openai" }, + { name = "requests" }, + { name = "schedule" }, + { name = "trafilatura" }, +] + +[package.metadata] +requires-dist = [ + { name = "beautifulsoup4", specifier = ">=4.13.4" }, + { name = "email-validator", specifier = ">=2.2.0" }, + { name = "flask", specifier = ">=3.1.1" }, + { name = "flask-cors", specifier = ">=5.0.1" }, + { name = "flask-sqlalchemy", specifier = ">=3.1.1" }, + { name = "gunicorn", specifier = ">=23.0.0" }, + { name = "openai", specifier = ">=1.78.1" }, + { name = "requests", specifier = ">=2.32.3" }, + { name = "schedule", specifier = ">=1.2.2" }, + { name = "trafilatura", specifier = ">=2.0.0" }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "schedule" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0c/91/b525790063015759f34447d4cf9d2ccb52cdee0f1dd6ff8764e863bcb74c/schedule-1.2.2.tar.gz", hash = "sha256:15fe9c75fe5fd9b9627f3f19cc0ef1420508f9f9a46f45cd0769ef75ede5f0b7", size = 26452 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/a7/84c96b61fd13205f2cafbe263cdb2745965974bdf3e0078f121dfeca5f02/schedule-1.2.2-py3-none-any.whl", hash = "sha256:5bef4a2a0183abf44046ae0d164cadcac21b1db011bdd8102e4a0c1e91e06a7d", size = 12220 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "soupsieve" +version = "2.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677 }, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.40" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/c3/3f2bfa5e4dcd9938405fe2fab5b6ab94a9248a4f9536ea2fd497da20525f/sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00", size = 9664299 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/7e/55044a9ec48c3249bb38d5faae93f09579c35e862bb318ebd1ed7a1994a5/sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e", size = 2114025 }, + { url = "https://files.pythonhosted.org/packages/77/0f/dcf7bba95f847aec72f638750747b12d37914f71c8cc7c133cf326ab945c/sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011", size = 2104419 }, + { url = "https://files.pythonhosted.org/packages/75/70/c86a5c20715e4fe903dde4c2fd44fc7e7a0d5fb52c1b954d98526f65a3ea/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4", size = 3222720 }, + { url = "https://files.pythonhosted.org/packages/12/cf/b891a8c1d0c27ce9163361664c2128c7a57de3f35000ea5202eb3a2917b7/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1", size = 3222682 }, + { url = "https://files.pythonhosted.org/packages/15/3f/7709d8c8266953d945435a96b7f425ae4172a336963756b58e996fbef7f3/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51", size = 3159542 }, + { url = "https://files.pythonhosted.org/packages/85/7e/717eaabaf0f80a0132dc2032ea8f745b7a0914451c984821a7c8737fb75a/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a", size = 3179864 }, + { url = "https://files.pythonhosted.org/packages/e4/cc/03eb5dfcdb575cbecd2bd82487b9848f250a4b6ecfb4707e834b4ce4ec07/sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b", size = 2084675 }, + { url = "https://files.pythonhosted.org/packages/9a/48/440946bf9dc4dc231f4f31ef0d316f7135bf41d4b86aaba0c0655150d370/sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4", size = 2110099 }, + { url = "https://files.pythonhosted.org/packages/92/06/552c1f92e880b57d8b92ce6619bd569b25cead492389b1d84904b55989d8/sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d", size = 2112620 }, + { url = "https://files.pythonhosted.org/packages/01/72/a5bc6e76c34cebc071f758161dbe1453de8815ae6e662393910d3be6d70d/sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a", size = 2103004 }, + { url = "https://files.pythonhosted.org/packages/bf/fd/0e96c8e6767618ed1a06e4d7a167fe13734c2f8113c4cb704443e6783038/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d", size = 3252440 }, + { url = "https://files.pythonhosted.org/packages/cd/6a/eb82e45b15a64266a2917a6833b51a334ea3c1991728fd905bfccbf5cf63/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716", size = 3263277 }, + { url = "https://files.pythonhosted.org/packages/45/97/ebe41ab4530f50af99e3995ebd4e0204bf1b0dc0930f32250dde19c389fe/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2", size = 3198591 }, + { url = "https://files.pythonhosted.org/packages/e6/1c/a569c1b2b2f5ac20ba6846a1321a2bf52e9a4061001f282bf1c5528dcd69/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191", size = 3225199 }, + { url = "https://files.pythonhosted.org/packages/8f/91/87cc71a6b10065ca0209d19a4bb575378abda6085e72fa0b61ffb2201b84/sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1", size = 2082959 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/14c511cda174aa1ad9b0e42b64ff5a71db35d08b0d80dc044dae958921e5/sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0", size = 2108526 }, + { url = "https://files.pythonhosted.org/packages/8c/18/4e3a86cc0232377bc48c373a9ba6a1b3fb79ba32dbb4eda0b357f5a2c59d/sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01", size = 2107887 }, + { url = "https://files.pythonhosted.org/packages/cb/60/9fa692b1d2ffc4cbd5f47753731fd332afed30137115d862d6e9a1e962c7/sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705", size = 2098367 }, + { url = "https://files.pythonhosted.org/packages/4c/9f/84b78357ca641714a439eb3fbbddb17297dacfa05d951dbf24f28d7b5c08/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364", size = 3184806 }, + { url = "https://files.pythonhosted.org/packages/4b/7d/e06164161b6bfce04c01bfa01518a20cccbd4100d5c951e5a7422189191a/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0", size = 3198131 }, + { url = "https://files.pythonhosted.org/packages/6d/51/354af20da42d7ec7b5c9de99edafbb7663a1d75686d1999ceb2c15811302/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db", size = 3131364 }, + { url = "https://files.pythonhosted.org/packages/7a/2f/48a41ff4e6e10549d83fcc551ab85c268bde7c03cf77afb36303c6594d11/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26", size = 3159482 }, + { url = "https://files.pythonhosted.org/packages/33/ac/e5e0a807163652a35be878c0ad5cfd8b1d29605edcadfb5df3c512cdf9f3/sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500", size = 2080704 }, + { url = "https://files.pythonhosted.org/packages/1c/cb/f38c61f7f2fd4d10494c1c135ff6a6ddb63508d0b47bccccd93670637309/sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad", size = 2104564 }, + { url = "https://files.pythonhosted.org/packages/d1/7c/5fc8e802e7506fe8b55a03a2e1dab156eae205c91bee46305755e086d2e2/sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a", size = 1903894 }, +] + +[[package]] +name = "tld" +version = "0.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/2b/678082222bc1d2823ea8384c6806085b85226ff73885c703fe0c7143ef64/tld-0.13.tar.gz", hash = "sha256:93dde5e1c04bdf1844976eae440706379d21f4ab235b73c05d7483e074fb5629", size = 446824 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/75/779ddeaf4d847ba0021ad99d1b615a853f2a5762bd5d118273c7f7673c38/tld-0.13-py2.py3-none-any.whl", hash = "sha256:f75b2be080f767ed17c2338a339eaa4fab5792586319ca819119da252f9f3749", size = 263789 }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + +[[package]] +name = "trafilatura" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "courlan" }, + { name = "htmldate" }, + { name = "justext" }, + { name = "lxml" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125 }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026 }, +] + +[[package]] +name = "urllib3" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, +] + +[[package]] +name = "werkzeug" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2471d621ab06f6ec6b3324685a2ce0f9777fd4a8b71e/werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746", size = 806925 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 }, +] diff --git a/webapp/.DS_Store b/webapp/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5ff6e2744cba5cbfa4cb8e4139cdeaed92d39408 Binary files /dev/null and b/webapp/.DS_Store differ