gary.info

here be dragons

mlx lm taskfile.yml

mlx-lm.yaml
version: '3'

vars:
  DEFAULT_MODEL: mistralai/Mistral-7B-Instruct-v0.3
  DEFAULT_QUANT_MODEL: mlx-community/Llama-3.2-3B-Instruct-4bit
  OUTPUT_DIR: ./outputs
  DATA_DIR: ./data
  ADAPTERS_DIR: ./adapters
  MAX_TOKENS: 500
  TEMP: 0.7
  TOP_P: 0.9
  BATCH_SIZE: 1
  LORA_LAYERS: 4
  ITERS: 1000
  HF_USERNAME: your-username

env:
  HF_HUB_ENABLE_HF_TRANSFER: 1

tasks:
  # Environment Setup
  install:
    desc: Install MLX and MLX-LM packages
    cmds:
      - pip install mlx mlx-lm
  
  install-dev:
    desc: Install MLX and MLX-LM with development dependencies
    cmds:
      - pip install -e "mlx[dev]"

  clone-repos:
    desc: Clone the MLX repositories
    cmds:
      - git clone https://github.com/ml-explore/mlx.git
      - git clone https://github.com/ml-explore/mlx-examples.git
      - git clone https://github.com/ml-explore/mlx-lm.git
  
  # Model Management
  download-model:
    desc: Download a model from Hugging Face
    cmds:
      - huggingface-cli download --local-dir {{.LOCAL_DIR | default "./models"}} {{.MODEL | default .DEFAULT_MODEL}}
    
  list-models:
    desc: Scan and list all locally cached models
    cmds:
      - mlx_lm.manage --scan
  
  delete-model:
    desc: Delete models matching a pattern
    cmds:
      - mlx_lm.manage --delete --pattern {{.PATTERN}}
    
  # Text Generation
  generate:
    desc: Generate text from a model
    cmds:
      - mkdir -p {{.OUTPUT_DIR}}
      - |
        mlx_lm.generate \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --prompt "{{.PROMPT | default "Hello, how are you?"}}" \
          --max-tokens {{.MAX_TOKENS}} \
          --temperature {{.TEMP}} \
          --top-p {{.TOP_P}}
  
  generate-stream:
    desc: Stream text generation from a model
    cmds:
      - |
        mlx_lm.generate \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --prompt "{{.PROMPT | default "Hello, how are you?"}}" \
          --max-tokens {{.MAX_TOKENS}} \
          --temperature {{.TEMP}} \
          --top-p {{.TOP_P}} \
          --stream
  
  chat:
    desc: Start an interactive chat session with a model
    cmds:
      - mlx_lm.chat --model {{.MODEL | default .DEFAULT_MODEL}}
  
  server:
    desc: Start an OpenAI-compatible API server
    cmds:
      - mlx_lm.server
  
  # Model Conversion
  convert:
    desc: Convert a Hugging Face model to MLX format
    cmds:
      - |
        mlx_lm.convert \
          --hf-path {{.MODEL | default .DEFAULT_MODEL}} \
          {{if eq .QUANTIZE "true"}}--quantize{{end}} \
          {{if .UPLOAD_REPO}}--upload-repo {{.UPLOAD_REPO}}{{end}} \
          {{if .OUTPUT}}--output {{.OUTPUT}}{{end}}
  
  convert-quantize:
    desc: Convert and quantize a Hugging Face model
    cmds:
      - task: convert
        vars:
          QUANTIZE: "true"
  
  upload-model:
    desc: Convert, quantize, and upload a model to Hugging Face
    cmds:
      - task: convert
        vars:
          QUANTIZE: "true"
          UPLOAD_REPO: "{{.HF_USERNAME}}/{{.REPO_NAME}}"
  
  # Fine-tuning
  prepare-data:
    desc: Create directories for fine-tuning data
    cmds:
      - mkdir -p {{.DATA_DIR}}
      - mkdir -p {{.ADAPTERS_DIR}}
      - echo "Place your train.jsonl and valid.jsonl files in {{.DATA_DIR}}"
  
  finetune-lora:
    desc: Fine-tune a model using LoRA
    cmds:
      - mkdir -p {{.ADAPTERS_DIR}}
      - |
        mlx_lm.lora \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --train \
          --data {{.DATA_DIR}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --batch-size {{.BATCH_SIZE}} \
          --num-layers {{.LORA_LAYERS}} \
          --iters {{.ITERS}} \
          {{if .GRAD_CHECKPOINT}}--grad-checkpoint{{end}}
  
  finetune-dora:
    desc: Fine-tune a model using DoRA
    cmds:
      - |
        mlx_lm.lora \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --train \
          --fine-tune-type dora \
          --data {{.DATA_DIR}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --batch-size {{.BATCH_SIZE}} \
          --num-layers {{.LORA_LAYERS}} \
          --iters {{.ITERS}}
  
  finetune-full:
    desc: Fine-tune the full model weights
    cmds:
      - |
        mlx_lm.lora \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --train \
          --fine-tune-type full \
          --data {{.DATA_DIR}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --batch-size {{.BATCH_SIZE}} \
          --iters {{.ITERS}}
  
  test-finetune:
    desc: Test a fine-tuned model
    cmds:
      - |
        mlx_lm.lora \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --data {{.DATA_DIR}} \
          --test
  
  generate-finetune:
    desc: Generate text using a fine-tuned model
    cmds:
      - |
        mlx_lm.generate \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --prompt "{{.PROMPT | default "Hello, how are you?"}}" \
          --max-tokens {{.MAX_TOKENS}}
  
  fuse-model:
    desc: Fuse LoRA adapters with the original model
    cmds:
      - |
        mlx_lm.fuse \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --save-path {{.OUTPUT_DIR}}/fused_model

  fuse-upload:
    desc: Fuse adapters and upload the model to Hugging Face
    cmds:
      - |
        mlx_lm.fuse \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --save-path {{.OUTPUT_DIR}}/fused_model \
          --upload-name {{.HF_USERNAME}}/{{.REPO_NAME}}
  
  export-gguf:
    desc: Export a fused model to GGUF format
    cmds:
      - |
        mlx_lm.fuse \
          --model {{.MODEL | default .DEFAULT_MODEL}} \
          --adapter-path {{.ADAPTERS_DIR}} \
          --save-path {{.OUTPUT_DIR}}/fused_model \
          --export-gguf
  
  # Development and examples
  run-mnist:
    desc: Run the MNIST example from MLX examples
    dir: mlx-examples/mnist
    cmds:
      - python main.py

  run-transformer:
    desc: Run the transformer language model example
    dir: mlx-examples/transformer_lm
    cmds:
      - python main.py --tiny

  run-whisper:
    desc: Run the Whisper speech recognition example
    dir: mlx-examples/whisper
    cmds:
      - python main.py {{.AUDIO_FILE}}

  # Pipeline tasks
  quickstart:
    desc: Quick setup and test of MLX
    cmds:
      - task: install
      - task: download-model
        vars:
          MODEL: "{{.DEFAULT_QUANT_MODEL}}"
      - task: generate
        vars:
          MODEL: "{{.DEFAULT_QUANT_MODEL}}"
          PROMPT: "Explain what the MLX framework is in one paragraph."

  complete-finetune:
    desc: Complete pipeline for fine-tuning a model
    cmds:
      - task: prepare-data
      - task: finetune-lora
      - task: test-finetune
      - task: fuse-model
      - echo "Fine-tuning complete. Fused model saved to {{.OUTPUT_DIR}}/fused_model"