diff --git "a/README.md" "b/README.md" new file mode 100644--- /dev/null +++ "b/README.md" @@ -0,0 +1,354 @@ +--- +tags: +- setfit +- sentence-transformers +- text-classification +- generated_from_setfit_trainer +widget: +- text: '{"@id": "hub:69f82e716acd4d5fae6e985152e8a192", "@type": "print:PrintRecord", + "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, + "print:hasColourDetails": "4 CP, UV Fade resistant inks", "print:hasCreatedDate": + {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-03-14"}, "print:hasCurrencyCode": + "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": + 31180, "print:hasCustomerName": "Scotts Miracle-Gro(SMG-POP)", "print:hasCutting": + "Trim to size", "print:hasElementID": 3175875, "print:hasElementTitle": "W198251 + SMG FY24 Lowes BOAS POP 17.5 x 3.85", "print:hasFinishedQuantity": 9600, "print:hasFinishedSizeHeight": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "3.85"}, "print:hasFinishedSizeWidth": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "17.5"}, "print:hasFlatSizeHeight": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "3.85"}, "print:hasFlatSizeWidth": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "17.5"}, "print:hasFscPaperBeenSpecified": + "No", "print:hasInternalID": "77ea840f-5190-4464-b5ff-f70a2c9904a4", "print:hasMaterialCategory": + "Plastic", "print:hasMaterialDescription": "20 mil white styrene", "print:hasMaterialRecycledPercentage": + "0%", "print:hasMaterialThicknessOrWeight": 20, "print:hasMaterialType": "Polystyrene", + "print:hasMaterialUnitOfMeasure": "Millimetres (mm)", "print:hasNumberOfVersions": + 1, "print:hasPackingRequirements": "Bundle in sets of 4, bulk pack and ship to + Temecula", "print:hasPermutationID": "69f82e71-6acd-4d5f-ae6e-985152e8a192", "print:hasPrice": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "6144.0"}, "print:hasPrintedSides": + "Single sided", "print:hasProductCategory": "Indoor/Outdoor Signage", "print:hasProofType": + "PDF digital proof", "print:hasQuantity": 9600, "print:hasRecycledContentBeenOffered": + "No", "print:hasSendToDetails": "karen.wessel@hhglobal.com", "print:hasSupplierName": + "M&M Displays Inc-HHG Strategic Partner(M&M Displays Inc - 33682 - HHGSP - US + Only)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' +- text: '{"@id": "hub:dbe34397419449cd99154d8ce74f83d4", "@type": "print:PrintRecord", + "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, + "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": + "2024-04-30"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": + "United States", "print:hasCustomerID": 31736, "print:hasCustomerName": "AutoZone(AutoZone)", + "print:hasCutting": "Trim to size", "print:hasElementID": 3262919, "print:hasElementTitle": + "6x4 APCs", "print:hasFinishedQuantity": 6396, "print:hasFinishedSizeHeight": + 4, "print:hasFinishedSizeWidth": 6, "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": + "1ce94b3f-83a2-42c3-90c8-9eeb08342d1f", "print:hasMaterialCategory": "Other", + "print:hasMaterialDescription": "APCs", "print:hasMaterialType": "Other", "print:hasNumberOfVersions": + 551024, "print:hasPermutationID": "dbe34397-4194-49cd-9915-4d8ce74f83d4", "print:hasPrice": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "95878.18"}, "print:hasPrintedSides": + "Single sided", "print:hasProofType": "PDF digital proof", "print:hasQuantity": + 6396, "print:hasRecycledContentBeenOffered": "N/A", "print:hasSupplierName": "Earth + Thebault/Mittera New Jersey(Mittera Group, Inc. - 47568 - HHGSP - ISR)", "print:hasTotalColours": + 4, "print:hasUnitOfMeasure": "Inches (in)"}' +- text: '{"@id": "hub:d7ae357afcee48c5bc042a2270c264bb", "@type": "print:PrintRecord", + "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, + "print:hasAdditionalInformation": "Color Wall Accents (deep orange, bright green, + sky blue, midtone orange); branded purpose statement, 8 versions", "print:hasCreatedDate": + {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-05-13"}, "print:hasCurrencyCode": + "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": + 38262, "print:hasCustomerName": "Architecture Products Group (APG)(Architecture + Products Group (APG) (USA))", "print:hasCutting": "Trim to size", "print:hasElementID": + 3284126, "print:hasElementTitle": "Photo paper prints Color Wall Accents (deep + orange, bright green, sky blue, midtone orange); bran...", "print:hasFinishedQuantity": + 12, "print:hasFinishedSizeHeight": 1, "print:hasFinishedSizeWidth": 1, "print:hasFscPaperBeenSpecified": + "No", "print:hasInternalID": "140d4809-bbf1-4b5f-b341-1b7501243aad", "print:hasMaterialCategory": + "Plastic", "print:hasMaterialDescription": "Vinyl", "print:hasMaterialRecycledPercentage": + "0%", "print:hasMaterialThicknessOrWeight": 1, "print:hasMaterialType": "PVC", + "print:hasMaterialUnitOfMeasure": "Pounds (lbs)", "print:hasNumberOfVersions": + 12, "print:hasPermutationID": "d7ae357a-fcee-48c5-bc04-2a2270c264bb", "print:hasPrice": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "122.4"}, "print:hasPrintedSides": + "Single sided", "print:hasProductCategory": "Indoor/Outdoor Signage", "print:hasProofType": + "PDF digital proof", "print:hasQuantity": 12, "print:hasRecycledContentBeenOffered": + "No", "print:hasSupplierName": "Firehouse Image Center(Firehouse Image Center - + 12168 - HHGSP)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches + (in)"}' +- text: '{"@id": "hub:8671421678e8439f9c4c15fe6dcd5342", "@type": "print:PrintRecord", + "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, + "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": + "2024-06-03"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": + "United States", "print:hasCustomerID": 39317, "print:hasCustomerName": "Gannett, + Inc(Consumer Marketing Services)", "print:hasCutting": "Trim to size", "print:hasElementID": + 3325140, "print:hasElementTitle": "with remit", "print:hasFinishedQuantity": 17971, + "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": "cbf2a0fd-4443-47b7-90cd-79e268477cd4", + "print:hasMaterialCategory": "Paper", "print:hasMaterialDescription": "Paper", + "print:hasMaterialType": "Paper", "print:hasMaterialUnitOfMeasure": "Pounds (lbs)", + "print:hasPermutationID": "86714216-78e8-439f-9c4c-15fe6dcd5342", "print:hasPrice": + {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "7500.4"}, "print:hasPrintedSides": + "Double sided", "print:hasProofType": "No proof required", "print:hasQuantity": + 17971, "print:hasRecycledContentBeenOffered": "N/A", "print:hasSupplierName": + "TrueSense Marketing Inc(TrueSense Marketing Inc - HHGSP - PI - ISR)", "print:hasUnitOfMeasure": + "Inches (in)"}' +- text: '{"@id": "hub:5c18ef55967a43a281321467fe3452f4", "@type": "print:PrintRecord", + "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, + "print:TotalColoursDoubleSidedSame": 4, "print:coloursToFaceAndReverseAreSame": + "Yes", "print:hasColourDetails": "4 color process", "print:hasCreatedDate": {"@type": + "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-05-14"}, "print:hasCurrencyCode": + "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": + 30642, "print:hasCustomerName": "Station Casinos LLC(Station Casinos)", "print:hasCutting": + "Trim to size", "print:hasElementID": 3286957, "print:hasElementTitle": "208101 + - 10\" x 10\" DS Tile - NO VELCRO", "print:hasFinishedQuantity": 10, "print:hasFinishedSizeHeight": + 10, "print:hasFinishedSizeWidth": 10, "print:hasFscPaperBeenSpecified": "No", + "print:hasHandFinishing": "Yes", "print:hasInternalID": "5439740f-0593-484a-8f03-cd67e125d36a", + "print:hasMaterialCategory": "Other", "print:hasMaterialDescription": "3/16\" + W/W/W Foamcore", "print:hasMaterialType": "Other", "print:hasNumberOfVersions": + 1, "print:hasPackingRequirements": "DELIVER TO PALACE", "print:hasPermutationID": + "5c18ef55-967a-43a2-8132-1467fe3452f4", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", + "@value": "26.0"}, "print:hasPrintedSides": "Double sided", "print:hasProofType": + "No proof required", "print:hasQuantity": 10, "print:hasRecycledContentBeenOffered": + "No", "print:hasSupplierName": "Quick Change Display(Quick Change Display - HHGSP)", + "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' +metrics: +- accuracy +pipeline_tag: text-classification +library_name: setfit +inference: true +base_model: intfloat/e5-base +--- + +# SetFit with intfloat/e5-base + +This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [intfloat/e5-base](https://huggingface.co/intfloat/e5-base) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification. + +The model has been trained using an efficient few-shot learning technique that involves: + +1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning. +2. Training a classification head with features from the fine-tuned Sentence Transformer. + +## Model Details + +### Model Description +- **Model Type:** SetFit +- **Sentence Transformer body:** [intfloat/e5-base](https://huggingface.co/intfloat/e5-base) +- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance +- **Maximum Sequence Length:** 512 tokens +- **Number of Classes:** 37 classes + + + + +### Model Sources + +- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit) +- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055) +- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit) + +### Model Labels +| Label | Examples | +|:------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1 | | +| 2 | | +| 3 | | +| 0 | | +| 4 | | +| 5 | | +| 6 | | +| 7 | | +| 8 | | +| 9 | | +| 10 | | +| 11 | | +| 12 | | +| 13 | | +| 14 | | +| 15 | | +| 16 | | +| 17 | | +| 18 | | +| 19 | | +| 20 | | +| 21 | | +| 22 | | +| 23 | | +| 24 | | +| 25 | | +| 26 | | +| 27 | | +| 28 | | +| 29 | | +| 30 | | +| 31 | | +| 32 | | +| 33 | | +| 34 | | +| 35 | | +| 36 | | + +## Uses + +### Direct Use for Inference + +First install the SetFit library: + +```bash +pip install setfit +``` + +Then you can load this model and run inference. + +```python +from setfit import SetFitModel + +# Download from the 🤗 Hub +model = SetFitModel.from_pretrained("Northell/ros-classifiers") +# Run inference +preds = model("{\"@id\": \"hub:8671421678e8439f9c4c15fe6dcd5342\", \"@type\": \"print:PrintRecord\", \"extraction:hasActivity\": {\"@id\": \"extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9\"}, \"print:hasCreatedDate\": {\"@type\": \"http://www.w3.org/2001/XMLSchema#date\", \"@value\": \"2024-06-03\"}, \"print:hasCurrencyCode\": \"USD\", \"print:hasCustomerHomeCountry\": \"United States\", \"print:hasCustomerID\": 39317, \"print:hasCustomerName\": \"Gannett, Inc(Consumer Marketing Services)\", \"print:hasCutting\": \"Trim to size\", \"print:hasElementID\": 3325140, \"print:hasElementTitle\": \"with remit\", \"print:hasFinishedQuantity\": 17971, \"print:hasFscPaperBeenSpecified\": \"No\", \"print:hasInternalID\": \"cbf2a0fd-4443-47b7-90cd-79e268477cd4\", \"print:hasMaterialCategory\": \"Paper\", \"print:hasMaterialDescription\": \"Paper\", \"print:hasMaterialType\": \"Paper\", \"print:hasMaterialUnitOfMeasure\": \"Pounds (lbs)\", \"print:hasPermutationID\": \"86714216-78e8-439f-9c4c-15fe6dcd5342\", \"print:hasPrice\": {\"@type\": \"http://www.w3.org/2001/XMLSchema#decimal\", \"@value\": \"7500.4\"}, \"print:hasPrintedSides\": \"Double sided\", \"print:hasProofType\": \"No proof required\", \"print:hasQuantity\": 17971, \"print:hasRecycledContentBeenOffered\": \"N/A\", \"print:hasSupplierName\": \"TrueSense Marketing Inc(TrueSense Marketing Inc - HHGSP - PI - ISR)\", \"print:hasUnitOfMeasure\": \"Inches (in)\"}") +``` + + + + + + + + + +## Training Details + +### Training Set Metrics +| Training set | Min | Median | Max | +|:-------------|:----|:---------|:----| +| Word count | 81 | 133.9019 | 289 | + +| Label | Training Sample Count | +|:------|:----------------------| +| 0 | 110 | +| 1 | 1 | +| 2 | 39 | +| 3 | 1 | +| 4 | 3 | +| 5 | 2 | +| 6 | 7 | +| 7 | 2 | +| 8 | 1 | +| 9 | 1 | +| 10 | 2 | +| 11 | 4 | +| 12 | 8 | +| 13 | 1 | +| 14 | 2 | +| 15 | 3 | +| 16 | 1 | +| 17 | 1 | +| 18 | 1 | +| 19 | 1 | +| 20 | 1 | +| 21 | 1 | +| 22 | 1 | +| 23 | 1 | +| 24 | 7 | +| 25 | 1 | +| 26 | 1 | +| 27 | 1 | +| 28 | 1 | +| 29 | 1 | +| 30 | 1 | +| 31 | 1 | +| 32 | 1 | +| 33 | 1 | +| 34 | 1 | +| 35 | 1 | +| 36 | 1 | + +### Training Hyperparameters +- batch_size: (16, 2) +- num_epochs: (1, 64) +- max_steps: -1 +- sampling_strategy: undersampling +- body_learning_rate: (2e-05, 1e-05) +- head_learning_rate: 0.01 +- loss: CosineSimilarityLoss +- distance_metric: cosine_distance +- margin: 0.25 +- end_to_end: False +- use_amp: False +- warmup_proportion: 0.1 +- l2_weight: 0.01 +- seed: 42 +- eval_max_steps: -1 +- load_best_model_at_end: False + +### Training Results +| Epoch | Step | Training Loss | Validation Loss | +|:------:|:----:|:-------------:|:---------------:| +| 0.0011 | 1 | 0.2809 | - | +| 0.0568 | 50 | 0.3179 | - | +| 0.1136 | 100 | 0.1994 | - | +| 0.1705 | 150 | 0.1105 | - | +| 0.2273 | 200 | 0.0543 | - | +| 0.2841 | 250 | 0.0416 | - | +| 0.3409 | 300 | 0.0398 | - | +| 0.3977 | 350 | 0.0252 | - | +| 0.4545 | 400 | 0.0328 | - | +| 0.5114 | 450 | 0.0211 | - | +| 0.5682 | 500 | 0.0202 | - | +| 0.625 | 550 | 0.0139 | - | +| 0.6818 | 600 | 0.0132 | - | +| 0.7386 | 650 | 0.0105 | - | +| 0.7955 | 700 | 0.0074 | - | +| 0.8523 | 750 | 0.01 | - | +| 0.9091 | 800 | 0.0104 | - | +| 0.9659 | 850 | 0.0193 | - | + +### Framework Versions +- Python: 3.10.12 +- SetFit: 1.1.2 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.7.1+cu126 +- Datasets: 3.4.1 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX +```bibtex +@article{https://doi.org/10.48550/arxiv.2209.11055, + doi = {10.48550/ARXIV.2209.11055}, + url = {https://arxiv.org/abs/2209.11055}, + author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren}, + keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Efficient Few-Shot Learning Without Prompts}, + publisher = {arXiv}, + year = {2022}, + copyright = {Creative Commons Attribution 4.0 International} +} +``` + + + + + + \ No newline at end of file