metadata
datasets:
- jondurbin/gutenberg-dpo-v0.1
- Qwen/Qwen2.5-14B-Instruct
- HuggingFaceH4/ultrafeedback_binarized
base_model:
- Qwen/Qwen2.5-14B-Instruct
library_name: transformers
tags:
- qwen
- qwen2.5
- finetune
- dpo
- qwen2
- chat
- conversational
- instruct
- storywriting
- roleplay
license: apache-2.0
language:
- en
pipeline_tag: text-generation
models:
- model: v000000/Qwen2.5-14B-Gutenberg-1e
- model: v000000/Qwen2.5-14B-Gutenberg-0.25e-Early-STACKED-0.37e
- model: v000000/Qwen2.5-14B-Gutenberg-0.25e-Early
- model: v000000/Qwen2.5-14B-Gutenberg-1e+v000000/qwen_results37
- model: Qwen/Qwen2.5-14B-Instruct+v000000/qwen_results37
- model: v000000/Qwen2.5-14B-Gutenberg-0.25e-Early-STACKED-0.37e+v000000/qwen_results100
- model: Qwen/Qwen2.5-14B-Instruct+v000000/qwen_results100
- model: tanliboy/lambda-qwen2.5-14b-dpo-test
- model: tanliboy/lambda-qwen2.5-14b-dpo-test
base_model: v000000/Qwen2.5-14B-Gutenberg-1e
merge_method: model_stock
dtype: bfloat16