From 5e8c1bb7fe03f1a3cd9d7b81043ffa7161bf2a4f Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 14:08:25 -0700
Subject: [PATCH 01/38] Initial red-candle provider implementation

---
 lib/ruby_llm.rb                               |   9 +
 lib/ruby_llm/configuration.rb                 |   4 +
 lib/ruby_llm/providers/red_candle.rb          |  70 ++++++
 .../providers/red_candle/capabilities.rb      | 122 ++++++++++
 lib/ruby_llm/providers/red_candle/chat.rb     | 168 ++++++++++++++
 lib/ruby_llm/providers/red_candle/models.rb   |  81 +++++++
 .../providers/red_candle/streaming.rb         |  39 ++++
 ruby_llm.gemspec                              |   2 +
 .../providers/red_candle/capabilities_spec.rb | 117 ++++++++++
 .../providers/red_candle/chat_spec.rb         | 209 ++++++++++++++++++
 .../providers/red_candle/models_spec.rb       | 103 +++++++++
 spec/ruby_llm/providers/red_candle_spec.rb    |  76 +++++++
 12 files changed, 1000 insertions(+)
 create mode 100644 lib/ruby_llm/providers/red_candle.rb
 create mode 100644 lib/ruby_llm/providers/red_candle/capabilities.rb
 create mode 100644 lib/ruby_llm/providers/red_candle/chat.rb
 create mode 100644 lib/ruby_llm/providers/red_candle/models.rb
 create mode 100644 lib/ruby_llm/providers/red_candle/streaming.rb
 create mode 100644 spec/ruby_llm/providers/red_candle/capabilities_spec.rb
 create mode 100644 spec/ruby_llm/providers/red_candle/chat_spec.rb
 create mode 100644 spec/ruby_llm/providers/red_candle/models_spec.rb
 create mode 100644 spec/ruby_llm/providers/red_candle_spec.rb

diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
index 9d88eb8dc..0db7f8743 100644
--- a/lib/ruby_llm.rb
+++ b/lib/ruby_llm.rb
@@ -89,6 +89,15 @@ def logger
 RubyLLM::Provider.register :perplexity, RubyLLM::Providers::Perplexity
 RubyLLM::Provider.register :vertexai, RubyLLM::Providers::VertexAI
 
+# Optional Red Candle provider - only available if gem is installed
+begin
+  require 'candle'
+  require 'ruby_llm/providers/red_candle'
+  RubyLLM::Provider.register :red_candle, RubyLLM::Providers::RedCandle
+rescue LoadError
+  # Red Candle is optional - provider won't be available if gem isn't installed
+end
+
 if defined?(Rails::Railtie)
   require 'ruby_llm/railtie'
   require 'ruby_llm/active_record/acts_as'
diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 34a842c2f..89d79e90f 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -23,6 +23,10 @@ class Configuration
                   :gpustack_api_base,
                   :gpustack_api_key,
                   :mistral_api_key,
+                  # Red Candle configuration
+                  :red_candle_device,
+                  :red_candle_cache_dir,
+                  :red_candle_debug,
                   # Default models
                   :default_model,
                   :default_embedding_model,
diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb
new file mode 100644
index 000000000..f200e4af4
--- /dev/null
+++ b/lib/ruby_llm/providers/red_candle.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    # Red Candle provider for local LLM execution using the Candle Rust crate.
+    class RedCandle < Provider
+      include RedCandle::Chat
+      include RedCandle::Models
+      include RedCandle::Capabilities
+      include RedCandle::Streaming
+
+      def initialize(config)
+        ensure_red_candle_available!
+        super
+        @loaded_models = {} # Cache for loaded models
+        @device = determine_device(config)
+      end
+
+      def api_base
+        nil # Local execution, no API base needed
+      end
+
+      def headers
+        {} # No HTTP headers needed
+      end
+
+      class << self
+        def capabilities
+          RedCandle::Capabilities
+        end
+
+        def configuration_requirements
+          [] # No required config, device is optional
+        end
+
+        def local?
+          true
+        end
+      end
+
+      private
+
+      def ensure_red_candle_available!
+        require 'candle'
+      rescue LoadError
+        raise Error.new(nil, "Red Candle gem is not installed. Add 'gem \"red-candle\", \"~> 1.2.3\"' to your Gemfile.")
+      end
+
+      def determine_device(config)
+        if config.red_candle_device
+          case config.red_candle_device.to_s.downcase
+          when 'cpu'
+            ::Candle::Device.cpu
+          when 'cuda', 'gpu'
+            ::Candle::Device.cuda
+          when 'metal'
+            ::Candle::Device.metal
+          else
+            ::Candle::Device.best
+          end
+        else
+          ::Candle::Device.best
+        end
+      rescue StandardError => e
+        RubyLLM.logger.warn "Failed to initialize device: #{e.message}. Falling back to CPU."
+        ::Candle::Device.cpu
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
new file mode 100644
index 000000000..3266f1ebf
--- /dev/null
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -0,0 +1,122 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class RedCandle
+      module Capabilities
+        extend self
+
+        def supports_vision?
+          false
+        end
+
+        def supports_functions?
+          false
+        end
+
+        def supports_streaming?
+          true
+        end
+
+        def supports_structured_output?
+          true
+        end
+
+        def supports_regex_constraints?
+          true
+        end
+
+        def supports_embeddings?
+          false # Future enhancement - Red Candle does support embedding models
+        end
+
+        def supports_audio?
+          false
+        end
+
+        def supports_pdf?
+          false
+        end
+
+        def normalize_temperature(temperature, model_id)
+          # Red Candle uses standard 0-2 range
+          return 0.7 if temperature.nil?
+
+          temperature = temperature.to_f
+          temperature.clamp(0.0, 2.0)
+        end
+
+        def model_context_window(model_id)
+          case model_id
+          when /gemma-3-4b/i
+            8192
+          when /qwen2\.5-0\.5b/i
+            32_768
+          else
+            4096 # Conservative default
+          end
+        end
+
+        def pricing
+          # Local execution - no API costs
+          {
+            input_tokens_per_dollar: Float::INFINITY,
+            output_tokens_per_dollar: Float::INFINITY,
+            input_price_per_million_tokens: 0.0,
+            output_price_per_million_tokens: 0.0
+          }
+        end
+
+        def default_max_tokens
+          512
+        end
+
+        def max_temperature
+          2.0
+        end
+
+        def min_temperature
+          0.0
+        end
+
+        def supports_temperature?
+          true
+        end
+
+        def supports_top_p?
+          true
+        end
+
+        def supports_top_k?
+          true
+        end
+
+        def supports_repetition_penalty?
+          true
+        end
+
+        def supports_seed?
+          true
+        end
+
+        def supports_stop_sequences?
+          true
+        end
+
+        def model_families
+          %w[gemma qwen]
+        end
+
+        def available_on_platform?
+          # Check if Candle can be loaded
+          begin
+            require 'candle'
+            true
+          rescue LoadError
+            false
+          end
+        end
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
new file mode 100644
index 000000000..ec9d7c8ed
--- /dev/null
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -0,0 +1,168 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class RedCandle
+      module Chat
+        def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
+          # Red Candle doesn't support tools
+          if tools && !tools.empty?
+            raise Error.new(nil, 'Red Candle provider does not support tool calling')
+          end
+
+          {
+            messages: messages,
+            temperature: temperature,
+            model: model.id,
+            stream: stream,
+            schema: schema
+          }
+        end
+
+        def perform_completion!(payload)
+          model = ensure_model_loaded!(payload[:model])
+          messages = format_messages(payload[:messages])
+
+          # Apply chat template if available
+          prompt = if model.respond_to?(:apply_chat_template)
+                     model.apply_chat_template(messages)
+                   else
+                     # Fallback to simple formatting
+                     messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
+                   end
+
+          # Configure generation
+          config_opts = {
+            temperature: payload[:temperature] || 0.7,
+            max_length: payload[:max_tokens] || 512
+          }
+
+          # Handle structured generation if schema provided
+          response = if payload[:schema]
+                       generate_with_schema(model, prompt, payload[:schema], config_opts)
+                     else
+                       model.generate(
+                         prompt,
+                         config: ::Candle::GenerationConfig.balanced(**config_opts)
+                       )
+                     end
+
+          format_response(response, payload[:schema])
+        end
+
+        def perform_streaming_completion!(payload, &block)
+          model = ensure_model_loaded!(payload[:model])
+          messages = format_messages(payload[:messages])
+
+          # Apply chat template if available
+          prompt = if model.respond_to?(:apply_chat_template)
+                     model.apply_chat_template(messages)
+                   else
+                     messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
+                   end
+
+          # Configure generation
+          config = ::Candle::GenerationConfig.balanced(
+            temperature: payload[:temperature] || 0.7,
+            max_length: payload[:max_tokens] || 512
+          )
+
+          # Stream tokens
+          buffer = ''
+          model.generate_stream(prompt, config: config) do |token|
+            buffer += token
+            chunk = format_stream_chunk(token)
+            block.call(chunk)
+          end
+
+          # Send final chunk with finish reason
+          final_chunk = {
+            delta: { content: '' },
+            finish_reason: 'stop'
+          }
+          block.call(final_chunk)
+        end
+
+        private
+
+        def ensure_model_loaded!(model_id)
+          @loaded_models[model_id] ||= load_model(model_id)
+        end
+
+        def load_model(model_id)
+          # Handle GGUF models with specific files
+          if model_id == 'google/gemma-3-4b-it-qat-q4_0-gguf'
+            ::Candle::LLM.from_pretrained(
+              model_id,
+              device: @device,
+              gguf_file: 'gemma-3-4b-it-q4_0.gguf'
+            )
+          else
+            ::Candle::LLM.from_pretrained(model_id, device: @device)
+          end
+        rescue StandardError => e
+          raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
+        end
+
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: msg[:role].to_s,
+              content: extract_message_content(msg)
+            }
+          end
+        end
+
+        def extract_message_content(message)
+          content = message[:content]
+          return content if content.is_a?(String)
+
+          # Handle array content (e.g., with images)
+          if content.is_a?(Array)
+            content.map do |part|
+              part[:text] if part[:type] == 'text'
+            end.compact.join(' ')
+          else
+            content.to_s
+          end
+        end
+
+        def generate_with_schema(model, prompt, schema, config_opts)
+          model.generate_structured(
+            prompt,
+            schema: schema,
+            **config_opts
+          )
+        rescue StandardError => e
+          RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation."
+          model.generate(
+            prompt,
+            config: ::Candle::GenerationConfig.balanced(**config_opts)
+          )
+        end
+
+        def format_response(response, schema)
+          content = if schema && !response.is_a?(String)
+                      # Structured response
+                      JSON.generate(response)
+                    else
+                      response
+                    end
+
+          {
+            content: content,
+            role: 'assistant',
+            finish_reason: 'stop'
+          }
+        end
+
+        def format_stream_chunk(token)
+          {
+            delta: { content: token },
+            finish_reason: nil
+          }
+        end
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
new file mode 100644
index 000000000..f1d8a7754
--- /dev/null
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class RedCandle
+      module Models
+        SUPPORTED_MODELS = [
+          {
+            id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
+            name: 'Gemma 3 4B Instruct (Quantized)',
+            gguf_file: 'gemma-3-4b-it-q4_0.gguf',
+            context_window: 8192,
+            family: 'gemma',
+            architecture: 'gemma2',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'Qwen/Qwen2.5-0.5B-Instruct',
+            name: 'Qwen 2.5 0.5B Instruct',
+            context_window: 32_768,
+            family: 'qwen',
+            architecture: 'qwen2',
+            supports_chat: true,
+            supports_structured: true
+          }
+        ].freeze
+
+        def list_models
+          SUPPORTED_MODELS.map do |model_data|
+            Model::Info.new(
+              id: model_data[:id],
+              name: model_data[:name],
+              provider: slug,
+              family: model_data[:family],
+              context_window: model_data[:context_window],
+              capabilities: %w[streaming structured_output],
+              modalities: { input: %w[text], output: %w[text] }
+            )
+          end
+        end
+
+        def models
+          @models ||= list_models
+        end
+
+        def model(id)
+          models.find { |m| m.id == id } ||
+            raise(Error.new(nil, "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
+        end
+
+        def model_available?(id)
+          SUPPORTED_MODELS.any? { |m| m[:id] == id }
+        end
+
+        def model_ids
+          SUPPORTED_MODELS.map { |m| m[:id] }
+        end
+
+        def model_info(id)
+          SUPPORTED_MODELS.find { |m| m[:id] == id }
+        end
+
+        def supports_chat?(model_id)
+          info = model_info(model_id)
+          info ? info[:supports_chat] : false
+        end
+
+        def supports_structured?(model_id)
+          info = model_info(model_id)
+          info ? info[:supports_structured] : false
+        end
+
+        def gguf_file_for(model_id)
+          info = model_info(model_id)
+          info ? info[:gguf_file] : nil
+        end
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/ruby_llm/providers/red_candle/streaming.rb b/lib/ruby_llm/providers/red_candle/streaming.rb
new file mode 100644
index 000000000..f0598ce8e
--- /dev/null
+++ b/lib/ruby_llm/providers/red_candle/streaming.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class RedCandle
+      module Streaming
+        def stream(payload, &block)
+          if payload[:stream]
+            perform_streaming_completion!(payload, &block)
+          else
+            # Non-streaming fallback
+            result = perform_completion!(payload)
+            # Yield the complete result as a single chunk
+            chunk = {
+              content: result[:content],
+              role: result[:role],
+              finish_reason: result[:finish_reason]
+            }
+            block.call(chunk)
+          end
+        end
+
+        private
+
+        def stream_processor
+          # Red Candle handles streaming internally through blocks
+          # This method is here for compatibility with the base streaming interface
+          nil
+        end
+
+        def process_stream_response(response)
+          # Red Candle doesn't use HTTP responses
+          # Streaming is handled directly in perform_streaming_completion!
+          response
+        end
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/ruby_llm.gemspec b/ruby_llm.gemspec
index 3e6e6af20..cf92fcd44 100644
--- a/ruby_llm.gemspec
+++ b/ruby_llm.gemspec
@@ -41,4 +41,6 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'faraday-retry', '>= 1'
   spec.add_dependency 'marcel', '~> 1.0'
   spec.add_dependency 'zeitwerk', '~> 2'
+
+  spec.add_development_dependency 'red-candle', '~> 1.2'
 end
diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
new file mode 100644
index 000000000..53c0b7e5e
--- /dev/null
+++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
@@ -0,0 +1,117 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Providers::RedCandle::Capabilities do
+  describe 'feature support' do
+    it 'does not support vision' do
+      expect(described_class.supports_vision?).to be false
+    end
+
+    it 'does not support functions' do
+      expect(described_class.supports_functions?).to be false
+    end
+
+    it 'supports streaming' do
+      expect(described_class.supports_streaming?).to be true
+    end
+
+    it 'supports structured output' do
+      expect(described_class.supports_structured_output?).to be true
+    end
+
+    it 'supports regex constraints' do
+      expect(described_class.supports_regex_constraints?).to be true
+    end
+
+    it 'does not support embeddings yet' do
+      expect(described_class.supports_embeddings?).to be false
+    end
+
+    it 'does not support audio' do
+      expect(described_class.supports_audio?).to be false
+    end
+
+    it 'does not support PDF' do
+      expect(described_class.supports_pdf?).to be false
+    end
+  end
+
+  describe '#normalize_temperature' do
+    it 'returns default temperature when nil' do
+      expect(described_class.normalize_temperature(nil, 'any_model')).to eq(0.7)
+    end
+
+    it 'clamps temperature to valid range' do
+      expect(described_class.normalize_temperature(-1, 'any_model')).to eq(0.0)
+      expect(described_class.normalize_temperature(3, 'any_model')).to eq(2.0)
+      expect(described_class.normalize_temperature(1.5, 'any_model')).to eq(1.5)
+    end
+  end
+
+  describe '#model_context_window' do
+    it 'returns correct context window for known models' do
+      expect(described_class.model_context_window('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq(8192)
+      expect(described_class.model_context_window('Qwen/Qwen2.5-0.5B-Instruct')).to eq(32_768)
+    end
+
+    it 'returns default for unknown models' do
+      expect(described_class.model_context_window('unknown/model')).to eq(4096)
+    end
+  end
+
+  describe '#pricing' do
+    it 'returns infinite tokens per dollar for local execution' do
+      pricing = described_class.pricing
+      expect(pricing[:input_tokens_per_dollar]).to eq(Float::INFINITY)
+      expect(pricing[:output_tokens_per_dollar]).to eq(Float::INFINITY)
+      expect(pricing[:input_price_per_million_tokens]).to eq(0.0)
+      expect(pricing[:output_price_per_million_tokens]).to eq(0.0)
+    end
+  end
+
+  describe 'generation parameters' do
+    it 'provides correct defaults and limits' do
+      expect(described_class.default_max_tokens).to eq(512)
+      expect(described_class.max_temperature).to eq(2.0)
+      expect(described_class.min_temperature).to eq(0.0)
+    end
+
+    it 'supports various generation parameters' do
+      expect(described_class.supports_temperature?).to be true
+      expect(described_class.supports_top_p?).to be true
+      expect(described_class.supports_top_k?).to be true
+      expect(described_class.supports_repetition_penalty?).to be true
+      expect(described_class.supports_seed?).to be true
+      expect(described_class.supports_stop_sequences?).to be true
+    end
+  end
+
+  describe '#model_families' do
+    it 'returns supported model families' do
+      expect(described_class.model_families).to eq(%w[gemma qwen])
+    end
+  end
+
+  describe '#available_on_platform?' do
+    context 'when Candle is available' do
+      before do
+        allow(described_class).to receive(:require).with('candle').and_return(true)
+      end
+
+      it 'returns true' do
+        expect(described_class.available_on_platform?).to be true
+      end
+    end
+
+    context 'when Candle is not available' do
+      before do
+        allow(described_class).to receive(:require).with('candle').and_raise(LoadError)
+      end
+
+      it 'returns false' do
+        expect(described_class.available_on_platform?).to be false
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb
new file mode 100644
index 000000000..513dd192c
--- /dev/null
+++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb
@@ -0,0 +1,209 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Providers::RedCandle::Chat do
+  let(:config) { RubyLLM::Configuration.new }
+  let(:provider) { RubyLLM::Providers::RedCandle.new(config) }
+  let(:model) { provider.model('Qwen/Qwen2.5-0.5B-Instruct') }
+
+  before(:all) do
+    begin
+      require 'candle'
+    rescue LoadError
+      skip 'Red Candle gem is not installed'
+    end
+  end
+
+  describe '#render_payload' do
+    let(:messages) { [{ role: 'user', content: 'Hello' }] }
+
+    it 'creates a valid payload' do
+      payload = provider.render_payload(
+        messages,
+        tools: nil,
+        temperature: 0.7,
+        model: model,
+        stream: false,
+        schema: nil
+      )
+
+      expect(payload).to include(
+        messages: messages,
+        temperature: 0.7,
+        model: 'Qwen/Qwen2.5-0.5B-Instruct',
+        stream: false,
+        schema: nil
+      )
+    end
+
+    it 'raises error when tools are provided' do
+      tools = [{ name: 'calculator', description: 'Does math' }]
+
+      expect do
+        provider.render_payload(
+          messages,
+          tools: tools,
+          temperature: 0.7,
+          model: model,
+          stream: false,
+          schema: nil
+        )
+      end.to raise_error(RubyLLM::Error, /does not support tool calling/)
+    end
+
+    it 'includes schema when provided' do
+      schema = { type: 'object', properties: { name: { type: 'string' } } }
+
+      payload = provider.render_payload(
+        messages,
+        tools: nil,
+        temperature: 0.7,
+        model: model,
+        stream: false,
+        schema: schema
+      )
+
+      expect(payload[:schema]).to eq(schema)
+    end
+  end
+
+  describe '#perform_completion!' do
+    let(:messages) { [{ role: 'user', content: 'Test message' }] }
+    let(:mock_model) { double('Candle::LLM') }
+
+    before do
+      allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model)
+      allow(mock_model).to receive(:respond_to?).with(:apply_chat_template).and_return(true)
+      allow(mock_model).to receive(:apply_chat_template).and_return('formatted prompt')
+    end
+
+    context 'with regular generation' do
+      it 'generates a response' do
+        allow(mock_model).to receive(:generate).and_return('Generated response')
+
+        payload = {
+          messages: messages,
+          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          temperature: 0.7
+        }
+
+        result = provider.perform_completion!(payload)
+
+        expect(result).to include(
+          content: 'Generated response',
+          role: 'assistant',
+          finish_reason: 'stop'
+        )
+      end
+    end
+
+    context 'with structured generation' do
+      it 'generates structured output' do
+        schema = { type: 'object', properties: { name: { type: 'string' } } }
+        structured_response = { 'name' => 'Alice' }
+
+        allow(mock_model).to receive(:generate_structured).and_return(structured_response)
+
+        payload = {
+          messages: messages,
+          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          temperature: 0.7,
+          schema: schema
+        }
+
+        result = provider.perform_completion!(payload)
+
+        expect(result[:content]).to eq(JSON.generate(structured_response))
+        expect(result[:role]).to eq('assistant')
+      end
+
+      it 'falls back to regular generation on structured failure' do
+        schema = { type: 'object', properties: { name: { type: 'string' } } }
+
+        allow(mock_model).to receive(:generate_structured).and_raise(StandardError, 'Structured gen failed')
+        allow(mock_model).to receive(:generate).and_return('Fallback response')
+        allow(RubyLLM.logger).to receive(:warn)
+
+        payload = {
+          messages: messages,
+          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          temperature: 0.7,
+          schema: schema
+        }
+
+        result = provider.perform_completion!(payload)
+
+        expect(result[:content]).to eq('Fallback response')
+        expect(RubyLLM.logger).to have_received(:warn).with(/Structured generation failed/)
+      end
+    end
+  end
+
+  describe '#perform_streaming_completion!' do
+    let(:messages) { [{ role: 'user', content: 'Stream test' }] }
+    let(:mock_model) { double('Candle::LLM') }
+
+    before do
+      allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model)
+      allow(mock_model).to receive(:respond_to?).with(:apply_chat_template).and_return(true)
+      allow(mock_model).to receive(:apply_chat_template).and_return('formatted prompt')
+    end
+
+    it 'streams tokens and sends finish reason' do
+      tokens = %w[Hello world !]
+      chunks_received = []
+
+      allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block|
+        tokens.each { |token| block.call(token) }
+      end
+
+      payload = {
+        messages: messages,
+        model: 'Qwen/Qwen2.5-0.5B-Instruct',
+        temperature: 0.7
+      }
+
+      provider.perform_streaming_completion!(payload) do |chunk|
+        chunks_received << chunk
+      end
+
+      # Check token chunks
+      tokens.each_with_index do |token, i|
+        expect(chunks_received[i]).to include(
+          delta: { content: token },
+          finish_reason: nil
+        )
+      end
+
+      # Check final chunk
+      expect(chunks_received.last).to include(
+        delta: { content: '' },
+        finish_reason: 'stop'
+      )
+    end
+  end
+
+  describe 'message formatting' do
+    it 'handles string content' do
+      messages = [{ role: 'user', content: 'Simple text' }]
+      formatted = provider.send(:format_messages, messages)
+
+      expect(formatted).to eq([{ role: 'user', content: 'Simple text' }])
+    end
+
+    it 'handles array content with text parts' do
+      messages = [{
+        role: 'user',
+        content: [
+          { type: 'text', text: 'Part 1' },
+          { type: 'text', text: 'Part 2' },
+          { type: 'image', url: 'ignored.jpg' }
+        ]
+      }]
+
+      formatted = provider.send(:format_messages, messages)
+      expect(formatted).to eq([{ role: 'user', content: 'Part 1 Part 2' }])
+    end
+  end
+end
\ No newline at end of file
diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb
new file mode 100644
index 000000000..8c89147bc
--- /dev/null
+++ b/spec/ruby_llm/providers/red_candle/models_spec.rb
@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Providers::RedCandle::Models do
+  let(:config) { RubyLLM::Configuration.new }
+  let(:provider) { RubyLLM::Providers::RedCandle.new(config) }
+
+  before(:all) do
+    begin
+      require 'candle'
+    rescue LoadError
+      skip 'Red Candle gem is not installed'
+    end
+  end
+
+  describe '#models' do
+    it 'returns an array of supported models' do
+      models = provider.models
+      expect(models).to be_an(Array)
+      expect(models.size).to eq(2)
+      expect(models.first).to be_a(RubyLLM::Model::Info)
+    end
+
+    it 'includes the expected model IDs' do
+      model_ids = provider.models.map(&:id)
+      expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf')
+      expect(model_ids).to include('Qwen/Qwen2.5-0.5B-Instruct')
+    end
+  end
+
+  describe '#model' do
+    context 'with a valid model ID' do
+      it 'returns the model' do
+        model = provider.model('Qwen/Qwen2.5-0.5B-Instruct')
+        expect(model).to be_a(RubyLLM::Model::Info)
+        expect(model.id).to eq('Qwen/Qwen2.5-0.5B-Instruct')
+      end
+    end
+
+    context 'with an invalid model ID' do
+      it 'raises an error' do
+        expect { provider.model('invalid/model') }.to raise_error(
+          RubyLLM::Error,
+          /Model invalid\/model not found/
+        )
+      end
+    end
+  end
+
+  describe '#model_available?' do
+    it 'returns true for supported models' do
+      expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
+      expect(provider.model_available?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+    end
+
+    it 'returns false for unsupported models' do
+      expect(provider.model_available?('gpt-4')).to be false
+    end
+  end
+
+  describe '#model_info' do
+    it 'returns model information' do
+      info = provider.model_info('Qwen/Qwen2.5-0.5B-Instruct')
+      expect(info).to include(
+        id: 'Qwen/Qwen2.5-0.5B-Instruct',
+        name: 'Qwen 2.5 0.5B Instruct',
+        context_window: 32_768,
+        family: 'qwen',
+        supports_chat: true,
+        supports_structured: true
+      )
+    end
+
+    it 'returns nil for unknown models' do
+      expect(provider.model_info('unknown')).to be_nil
+    end
+  end
+
+  describe '#gguf_file_for' do
+    it 'returns the GGUF file for Gemma model' do
+      expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf')
+    end
+
+    it 'returns nil for non-GGUF models' do
+      expect(provider.gguf_file_for('Qwen/Qwen2.5-0.5B-Instruct')).to be_nil
+    end
+  end
+
+  describe '#supports_chat?' do
+    it 'returns true for all current models' do
+      expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
+      expect(provider.supports_chat?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+    end
+  end
+
+  describe '#supports_structured?' do
+    it 'returns true for all current models' do
+      expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
+      expect(provider.supports_structured?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+    end
+  end
+end
\ No newline at end of file
diff --git a/spec/ruby_llm/providers/red_candle_spec.rb b/spec/ruby_llm/providers/red_candle_spec.rb
new file mode 100644
index 000000000..8e1216976
--- /dev/null
+++ b/spec/ruby_llm/providers/red_candle_spec.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Providers::RedCandle do
+  let(:config) { RubyLLM::Configuration.new }
+  let(:provider) { described_class.new(config) }
+
+  # Skip all tests if Red Candle is not available
+  before(:all) do
+    begin
+      require 'candle'
+    rescue LoadError
+      skip 'Red Candle gem is not installed'
+    end
+  end
+
+  describe '#initialize' do
+    context 'when Red Candle is not available' do
+      before do
+        allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError)
+      end
+
+      it 'raises an informative error' do
+        expect { described_class.new(config) }.to raise_error(
+          RubyLLM::Error,
+          /Red Candle gem is not installed/
+        )
+      end
+    end
+
+
+    context 'with device configuration' do
+      it 'uses the configured device' do
+        config.red_candle_device = 'cpu'
+        provider = described_class.new(config)
+        expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.cpu)
+      end
+
+      it 'defaults to best device when not configured' do
+        provider = described_class.new(config)
+        expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.best)
+      end
+    end
+  end
+
+  describe '#api_base' do
+    it 'returns nil for local execution' do
+      expect(provider.api_base).to be_nil
+    end
+  end
+
+  describe '#headers' do
+    it 'returns empty hash' do
+      expect(provider.headers).to eq({})
+    end
+  end
+
+  describe '.local?' do
+    it 'returns true' do
+      expect(described_class.local?).to be true
+    end
+  end
+
+  describe '.configuration_requirements' do
+    it 'returns empty array' do
+      expect(described_class.configuration_requirements).to eq([])
+    end
+  end
+
+  describe '.capabilities' do
+    it 'returns the Capabilities module' do
+      expect(described_class.capabilities).to eq(RubyLLM::Providers::RedCandle::Capabilities)
+    end
+  end
+end
\ No newline at end of file

From 5c770ddd3cb31cd1b0054f872f26c323355c5b8b Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 14:31:32 -0700
Subject: [PATCH 02/38] Starting to work

---
 lib/ruby_llm/providers/red_candle/chat.rb   | 62 +++++++++++++++------
 lib/ruby_llm/providers/red_candle/models.rb | 16 ++++++
 2 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index ec9d7c8ed..1e2ed798a 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -4,6 +4,30 @@ module RubyLLM
   module Providers
     class RedCandle
       module Chat
+        # Override the base complete method to handle local execution
+        def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &)
+          payload = render_payload(
+            messages,
+            tools: tools,
+            temperature: temperature,
+            model: model,
+            stream: block_given?,
+            schema: schema
+          ).merge(params)
+
+          if block_given?
+            perform_streaming_completion!(payload, &)
+          else
+            result = perform_completion!(payload)
+            # Convert to Message object for compatibility
+            Message.new(
+              role: result[:role].to_sym,
+              content: result[:content],
+              model_id: model.id
+            )
+          end
+        end
+
         def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
           # Red Candle doesn't support tools
           if tools && !tools.empty?
@@ -68,18 +92,13 @@ def perform_streaming_completion!(payload, &block)
           )
 
           # Stream tokens
-          buffer = ''
           model.generate_stream(prompt, config: config) do |token|
-            buffer += token
             chunk = format_stream_chunk(token)
             block.call(chunk)
           end
 
           # Send final chunk with finish reason
-          final_chunk = {
-            delta: { content: '' },
-            finish_reason: 'stop'
-          }
+          final_chunk = format_stream_chunk('', 'stop')
           block.call(final_chunk)
         end
 
@@ -90,14 +109,19 @@ def ensure_model_loaded!(model_id)
         end
 
         def load_model(model_id)
-          # Handle GGUF models with specific files
-          if model_id == 'google/gemma-3-4b-it-qat-q4_0-gguf'
-            ::Candle::LLM.from_pretrained(
-              model_id,
-              device: @device,
-              gguf_file: 'gemma-3-4b-it-q4_0.gguf'
-            )
+          # Get GGUF file and tokenizer if this is a GGUF model  
+          # Access the methods from the Models module which is included in the provider
+          gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil
+          tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil
+          
+          if gguf_file
+            # For GGUF models, use the tokenizer if specified, otherwise use model_id
+            options = { device: @device, gguf_file: gguf_file }
+            options[:tokenizer] = tokenizer if tokenizer
+            
+            ::Candle::LLM.from_pretrained(model_id, **options)
           else
+            # For regular models, use from_pretrained without gguf_file
             ::Candle::LLM.from_pretrained(model_id, device: @device)
           end
         rescue StandardError => e
@@ -156,11 +180,13 @@ def format_response(response, schema)
           }
         end
 
-        def format_stream_chunk(token)
-          {
-            delta: { content: token },
-            finish_reason: nil
-          }
+        def format_stream_chunk(token, finish_reason = nil)
+          # Return a Chunk object for streaming compatibility
+          Chunk.new(
+            role: :assistant,
+            content: token,
+            finish_reason: finish_reason
+          )
         end
       end
     end
diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
index f1d8a7754..66c9b6955 100644
--- a/lib/ruby_llm/providers/red_candle/models.rb
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -9,12 +9,23 @@ module Models
             id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
             name: 'Gemma 3 4B Instruct (Quantized)',
             gguf_file: 'gemma-3-4b-it-q4_0.gguf',
+            tokenizer: 'google/gemma-3-4b-it',  # Tokenizer from base model
             context_window: 8192,
             family: 'gemma',
             architecture: 'gemma2',
             supports_chat: true,
             supports_structured: true
           },
+          {
+            id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
+            name: 'TinyLlama 1.1B Chat (Quantized)',
+            gguf_file: 'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
+            context_window: 2048,
+            family: 'llama',
+            architecture: 'llama',
+            supports_chat: true,
+            supports_structured: true
+          },
           {
             id: 'Qwen/Qwen2.5-0.5B-Instruct',
             name: 'Qwen 2.5 0.5B Instruct',
@@ -75,6 +86,11 @@ def gguf_file_for(model_id)
           info = model_info(model_id)
           info ? info[:gguf_file] : nil
         end
+
+        def tokenizer_for(model_id)
+          info = model_info(model_id)
+          info ? info[:tokenizer] : nil
+        end
       end
     end
   end

From fe199a850d7a79176f640062b5f904ed45829bbc Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 14:41:19 -0700
Subject: [PATCH 03/38] Swap qwen for mistral

---
 lib/ruby_llm/providers/red_candle/chat.rb     | 12 +++----
 lib/ruby_llm/providers/red_candle/models.rb   | 10 +++---
 .../providers/red_candle/chat_spec.rb         | 31 ++++++++---------
 .../providers/red_candle/models_spec.rb       | 33 +++++++++++--------
 4 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 1e2ed798a..c4838496f 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -97,8 +97,8 @@ def perform_streaming_completion!(payload, &block)
             block.call(chunk)
           end
 
-          # Send final chunk with finish reason
-          final_chunk = format_stream_chunk('', 'stop')
+          # Send final chunk with empty content (indicates completion)
+          final_chunk = format_stream_chunk('')
           block.call(final_chunk)
         end
 
@@ -175,17 +175,15 @@ def format_response(response, schema)
 
           {
             content: content,
-            role: 'assistant',
-            finish_reason: 'stop'
+            role: 'assistant'
           }
         end
 
-        def format_stream_chunk(token, finish_reason = nil)
+        def format_stream_chunk(token)
           # Return a Chunk object for streaming compatibility
           Chunk.new(
             role: :assistant,
-            content: token,
-            finish_reason: finish_reason
+            content: token
           )
         end
       end
diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
index 66c9b6955..7d520832e 100644
--- a/lib/ruby_llm/providers/red_candle/models.rb
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -27,11 +27,13 @@ module Models
             supports_structured: true
           },
           {
-            id: 'Qwen/Qwen2.5-0.5B-Instruct',
-            name: 'Qwen 2.5 0.5B Instruct',
+            id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
+            name: 'Mistral 7B Instruct v0.2 (Quantized)',
+            gguf_file: 'mistral-7b-instruct-v0.2.Q4_K_M.gguf',
+            tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2',
             context_window: 32_768,
-            family: 'qwen',
-            architecture: 'qwen2',
+            family: 'mistral',
+            architecture: 'mistral',
             supports_chat: true,
             supports_structured: true
           }
diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb
index 513dd192c..42a1f7a41 100644
--- a/spec/ruby_llm/providers/red_candle/chat_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb
@@ -5,7 +5,7 @@
 RSpec.describe RubyLLM::Providers::RedCandle::Chat do
   let(:config) { RubyLLM::Configuration.new }
   let(:provider) { RubyLLM::Providers::RedCandle.new(config) }
-  let(:model) { provider.model('Qwen/Qwen2.5-0.5B-Instruct') }
+  let(:model) { provider.model('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') }
 
   before(:all) do
     begin
@@ -31,7 +31,7 @@
       expect(payload).to include(
         messages: messages,
         temperature: 0.7,
-        model: 'Qwen/Qwen2.5-0.5B-Instruct',
+        model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
         stream: false,
         schema: nil
       )
@@ -84,7 +84,7 @@
 
         payload = {
           messages: messages,
-          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
           temperature: 0.7
         }
 
@@ -92,8 +92,7 @@
 
         expect(result).to include(
           content: 'Generated response',
-          role: 'assistant',
-          finish_reason: 'stop'
+          role: 'assistant'
         )
       end
     end
@@ -107,7 +106,7 @@
 
         payload = {
           messages: messages,
-          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
           temperature: 0.7,
           schema: schema
         }
@@ -127,7 +126,7 @@
 
         payload = {
           messages: messages,
-          model: 'Qwen/Qwen2.5-0.5B-Instruct',
+          model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
           temperature: 0.7,
           schema: schema
         }
@@ -160,7 +159,7 @@
 
       payload = {
         messages: messages,
-        model: 'Qwen/Qwen2.5-0.5B-Instruct',
+        model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
         temperature: 0.7
       }
 
@@ -170,17 +169,15 @@
 
       # Check token chunks
       tokens.each_with_index do |token, i|
-        expect(chunks_received[i]).to include(
-          delta: { content: token },
-          finish_reason: nil
-        )
+        chunk = chunks_received[i]
+        expect(chunk).to be_a(RubyLLM::Chunk)
+        expect(chunk.content).to eq(token)
       end
 
-      # Check final chunk
-      expect(chunks_received.last).to include(
-        delta: { content: '' },
-        finish_reason: 'stop'
-      )
+      # Check final chunk (empty content indicates completion)
+      final_chunk = chunks_received.last
+      expect(final_chunk).to be_a(RubyLLM::Chunk)
+      expect(final_chunk.content).to eq('')
     end
   end
 
diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb
index 8c89147bc..1f8533b6c 100644
--- a/spec/ruby_llm/providers/red_candle/models_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/models_spec.rb
@@ -18,23 +18,24 @@
     it 'returns an array of supported models' do
       models = provider.models
       expect(models).to be_an(Array)
-      expect(models.size).to eq(2)
+      expect(models.size).to eq(3)
       expect(models.first).to be_a(RubyLLM::Model::Info)
     end
 
     it 'includes the expected model IDs' do
       model_ids = provider.models.map(&:id)
       expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf')
-      expect(model_ids).to include('Qwen/Qwen2.5-0.5B-Instruct')
+      expect(model_ids).to include('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')
+      expect(model_ids).to include('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
     end
   end
 
   describe '#model' do
     context 'with a valid model ID' do
       it 'returns the model' do
-        model = provider.model('Qwen/Qwen2.5-0.5B-Instruct')
+        model = provider.model('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
         expect(model).to be_a(RubyLLM::Model::Info)
-        expect(model.id).to eq('Qwen/Qwen2.5-0.5B-Instruct')
+        expect(model.id).to eq('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
       end
     end
 
@@ -51,7 +52,7 @@
   describe '#model_available?' do
     it 'returns true for supported models' do
       expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.model_available?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+      expect(provider.model_available?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
     end
 
     it 'returns false for unsupported models' do
@@ -61,12 +62,12 @@
 
   describe '#model_info' do
     it 'returns model information' do
-      info = provider.model_info('Qwen/Qwen2.5-0.5B-Instruct')
+      info = provider.model_info('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
       expect(info).to include(
-        id: 'Qwen/Qwen2.5-0.5B-Instruct',
-        name: 'Qwen 2.5 0.5B Instruct',
+        id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
+        name: 'Mistral 7B Instruct v0.2 (Quantized)',
         context_window: 32_768,
-        family: 'qwen',
+        family: 'mistral',
         supports_chat: true,
         supports_structured: true
       )
@@ -82,22 +83,28 @@
       expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf')
     end
 
-    it 'returns nil for non-GGUF models' do
-      expect(provider.gguf_file_for('Qwen/Qwen2.5-0.5B-Instruct')).to be_nil
+    it 'returns the GGUF file for Mistral model' do
+      expect(provider.gguf_file_for('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq('mistral-7b-instruct-v0.2.Q4_K_M.gguf')
+    end
+
+    it 'returns nil for unknown models' do
+      expect(provider.gguf_file_for('unknown')).to be_nil
     end
   end
 
   describe '#supports_chat?' do
     it 'returns true for all current models' do
       expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.supports_chat?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+      expect(provider.supports_chat?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
+      expect(provider.supports_chat?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true
     end
   end
 
   describe '#supports_structured?' do
     it 'returns true for all current models' do
       expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.supports_structured?('Qwen/Qwen2.5-0.5B-Instruct')).to be true
+      expect(provider.supports_structured?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
+      expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true
     end
   end
 end
\ No newline at end of file

From b8bf3319d357ef1f60dfb91c11a1502858d72246 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 14:58:58 -0700
Subject: [PATCH 04/38] Trying to add red-candle to the models_to_test.rb

---
 lib/ruby_llm/providers/red_candle/chat.rb | 40 +++++++++++++++++++----
 spec/ruby_llm/chat_spec.rb                |  5 +++
 spec/support/models_to_test.rb            |  1 +
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index c4838496f..af915fbcb 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -20,10 +20,18 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
           else
             result = perform_completion!(payload)
             # Convert to Message object for compatibility
+            # Red Candle doesn't provide token counts, but we can estimate them
+            content = result[:content]
+            # Rough estimation: ~4 characters per token
+            estimated_output_tokens = (content.length / 4.0).round
+            estimated_input_tokens = estimate_input_tokens(payload[:messages])
+            
             Message.new(
               role: result[:role].to_sym,
-              content: result[:content],
-              model_id: model.id
+              content: content,
+              model_id: model.id,
+              input_tokens: estimated_input_tokens,
+              output_tokens: estimated_output_tokens
             )
           end
         end
@@ -130,13 +138,26 @@ def load_model(model_id)
 
         def format_messages(messages)
           messages.map do |msg|
-            {
-              role: msg[:role].to_s,
-              content: extract_message_content(msg)
-            }
+            # Handle both hash and Message objects
+            if msg.is_a?(Message)
+              {
+                role: msg.role.to_s,
+                content: extract_message_content_from_object(msg)
+              }
+            else
+              {
+                role: msg[:role].to_s,
+                content: extract_message_content(msg)
+              }
+            end
           end
         end
 
+        def extract_message_content_from_object(message)
+          # For Message objects, get the content directly
+          message.content.to_s
+        end
+
         def extract_message_content(message)
           content = message[:content]
           return content if content.is_a?(String)
@@ -186,6 +207,13 @@ def format_stream_chunk(token)
             content: token
           )
         end
+
+        def estimate_input_tokens(messages)
+          # Rough estimation: ~4 characters per token
+          formatted = format_messages(messages)
+          total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
+          (total_chars / 4.0).round
+        end
       end
     end
   end
diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb
index 1c775d11e..c1df68a43 100644
--- a/spec/ruby_llm/chat_spec.rb
+++ b/spec/ruby_llm/chat_spec.rb
@@ -20,6 +20,11 @@
       end
 
       it "#{provider}/#{model} returns raw responses" do
+        # Red Candle is a truly local provider and doesn't have HTTP responses
+        if provider == :red_candle
+          skip 'Red Candle provider does not have raw HTTP responses'
+        end
+        
         chat = RubyLLM.chat(model: model, provider: provider)
         response = chat.ask('What is the capital of France?')
         expect(response.raw).to be_present
diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb
index 2ad90e4b4..04591ebee 100644
--- a/spec/support/models_to_test.rb
+++ b/spec/support/models_to_test.rb
@@ -11,6 +11,7 @@
   { provider: :openai, model: 'gpt-4.1-nano' },
   { provider: :openrouter, model: 'anthropic/claude-3.5-haiku' },
   { provider: :perplexity, model: 'sonar' },
+  { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' },
   { provider: :vertexai, model: 'gemini-2.5-flash' }
 ].freeze
 

From d98834c345f4d639666b12e273a0d79fda788090 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 15:11:37 -0700
Subject: [PATCH 05/38] Adding red-candle to the models_to_test file

---
 .../providers/red_candle/capabilities.rb      |  2 +-
 lib/ruby_llm/providers/red_candle/chat.rb     | 62 +++++++++++++++++--
 spec/ruby_llm/chat_streaming_spec.rb          | 18 ++++--
 spec/ruby_llm/chat_tools_spec.rb              | 54 +++++++++++-----
 spec/support/streaming_error_helpers.rb       | 12 +++-
 5 files changed, 119 insertions(+), 29 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index 3266f1ebf..ae8efda17 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -10,7 +10,7 @@ def supports_vision?
           false
         end
 
-        def supports_functions?
+        def supports_functions?(_model_id = nil)
           false
         end
 
diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index af915fbcb..b90cd2127 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -99,8 +99,12 @@ def perform_streaming_completion!(payload, &block)
             max_length: payload[:max_tokens] || 512
           )
 
+          # Collect all streamed content
+          full_content = ''
+          
           # Stream tokens
           model.generate_stream(prompt, config: config) do |token|
+            full_content += token
             chunk = format_stream_chunk(token)
             block.call(chunk)
           end
@@ -108,6 +112,18 @@ def perform_streaming_completion!(payload, &block)
           # Send final chunk with empty content (indicates completion)
           final_chunk = format_stream_chunk('')
           block.call(final_chunk)
+          
+          # Return a Message object with the complete response
+          estimated_output_tokens = (full_content.length / 4.0).round
+          estimated_input_tokens = estimate_input_tokens(payload[:messages])
+          
+          Message.new(
+            role: :assistant,
+            content: full_content,
+            model_id: payload[:model],
+            input_tokens: estimated_input_tokens,
+            output_tokens: estimated_output_tokens
+          )
         end
 
         private
@@ -154,16 +170,50 @@ def format_messages(messages)
         end
 
         def extract_message_content_from_object(message)
-          # For Message objects, get the content directly
-          message.content.to_s
+          content = message.content
+          
+          # Handle Content objects
+          if content.is_a?(Content)
+            # Extract text from Content object, including attachment text
+            text_parts = []
+            text_parts << content.text if content.text
+            
+            # Add any text from attachments
+            content.attachments&.each do |attachment|
+              if attachment.respond_to?(:data) && attachment.data.is_a?(String)
+                text_parts << attachment.data
+              end
+            end
+            
+            text_parts.join(' ')
+          elsif content.is_a?(String)
+            content
+          else
+            content.to_s
+          end
         end
 
         def extract_message_content(message)
           content = message[:content]
-          return content if content.is_a?(String)
-
-          # Handle array content (e.g., with images)
-          if content.is_a?(Array)
+          
+          # Handle Content objects
+          if content.is_a?(Content)
+            # Extract text from Content object
+            text_parts = []
+            text_parts << content.text if content.text
+            
+            # Add any text from attachments
+            content.attachments&.each do |attachment|
+              if attachment.respond_to?(:data) && attachment.data.is_a?(String)
+                text_parts << attachment.data
+              end
+            end
+            
+            text_parts.join(' ')
+          elsif content.is_a?(String)
+            content
+          elsif content.is_a?(Array)
+            # Handle array content (e.g., with images)
             content.map do |part|
               part[:text] if part[:type] == 'text'
             end.compact.join(' ')
diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb
index fc6ee8d9a..53724b316 100644
--- a/spec/ruby_llm/chat_streaming_spec.rb
+++ b/spec/ruby_llm/chat_streaming_spec.rb
@@ -20,11 +20,15 @@
 
         expect(chunks).not_to be_empty
         expect(chunks.first).to be_a(RubyLLM::Chunk)
-        expect(response.raw).to be_present
-        expect(response.raw.headers).to be_present
-        expect(response.raw.status).to be_present
-        expect(response.raw.status).to eq(200)
-        expect(response.raw.env.request_body).to be_present
+        
+        # Red Candle is a local provider without HTTP responses
+        unless provider == :red_candle
+          expect(response.raw).to be_present
+          expect(response.raw.headers).to be_present
+          expect(response.raw.status).to be_present
+          expect(response.raw.status).to eq(200)
+          expect(response.raw.env.request_body).to be_present
+        end
       end
 
       it "#{provider}/#{model} reports consistent token counts compared to non-streaming" do
@@ -60,6 +64,7 @@
           end
 
           it "#{provider}/#{model} supports handling streaming error chunks" do
+            skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle
             # Testing if error handling is now implemented
 
             stub_error_response(provider, :chunk)
@@ -75,6 +80,7 @@
 
           it "#{provider}/#{model} supports handling streaming error events" do
             skip 'Bedrock uses AWS Event Stream format, not SSE events' if provider == :bedrock
+            skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle
 
             # Testing if error handling is now implemented
 
@@ -96,6 +102,7 @@
           end
 
           it "#{provider}/#{model} supports handling streaming error chunks" do
+            skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle
             # Testing if error handling is now implemented
 
             stub_error_response(provider, :chunk)
@@ -111,6 +118,7 @@
 
           it "#{provider}/#{model} supports handling streaming error events" do
             skip 'Bedrock uses AWS Event Stream format, not SSE events' if provider == :bedrock
+            skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle
 
             # Testing if error handling is now implemented
 
diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb
index cff534172..c61f8cccc 100644
--- a/spec/ruby_llm/chat_tools_spec.rb
+++ b/spec/ruby_llm/chat_tools_spec.rb
@@ -74,9 +74,11 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools" do
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -94,9 +96,11 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools in multi-turn conversations" do
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -118,9 +122,11 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools without parameters" do
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -136,6 +142,13 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools without parameters in multi-turn streaming conversations" do
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        end
+        
         if provider == :gpustack && model == 'qwen3'
           skip 'gpustack/qwen3 does not support streaming tool calls properly'
         end
@@ -175,6 +188,13 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools with multi-turn streaming conversations" do
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        end
+        
         if provider == :gpustack && model == 'qwen3'
           skip 'gpustack/qwen3 does not support streaming tool calls properly'
         end
@@ -213,9 +233,11 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can handle multiple tool calls in a single response" do
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -303,9 +325,11 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} preserves Content objects returned from tools" do
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
+        # Check if the provider supports functions for this model
+        provider_class = RubyLLM::Provider.providers[provider]
+        if provider_class
+          provider_instance = provider_class.new(RubyLLM.config)
+          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
         end
 
         # Skip providers that don't support images in tool results
diff --git a/spec/support/streaming_error_helpers.rb b/spec/support/streaming_error_helpers.rb
index 9c89ef9c5..fbc5467f7 100644
--- a/spec/support/streaming_error_helpers.rb
+++ b/spec/support/streaming_error_helpers.rb
@@ -143,15 +143,23 @@ module StreamingErrorHelpers
       },
       chunk_status: 529,
       expected_error: RubyLLM::OverloadedError
+    },
+    red_candle: {
+      # Red Candle is a local provider, so it doesn't have HTTP streaming errors
+      # We include it here to prevent test failures when checking for error handling
+      url: nil,
+      error_response: nil,
+      chunk_status: nil,
+      expected_error: nil
     }
   }.freeze
 
   def error_handling_supported?(provider)
-    ERROR_HANDLING_CONFIGS.key?(provider)
+    ERROR_HANDLING_CONFIGS.key?(provider) && ERROR_HANDLING_CONFIGS[provider][:expected_error]
   end
 
   def expected_error_for(provider)
-    ERROR_HANDLING_CONFIGS[provider][:expected_error]
+    ERROR_HANDLING_CONFIGS[provider]&.fetch(:expected_error, nil)
   end
 
   def stub_error_response(provider, type)

From b207f69c472a4e4ed49152d56ced54369e0cb02d Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 15:21:42 -0700
Subject: [PATCH 06/38] Trying to fix the way tool calling support is checked
 in the specs

---
 lib/ruby_llm/providers/red_candle.rb         |  4 ++
 spec/ruby_llm/chat_tools_spec.rb             | 75 ++++++--------------
 spec/spec_helper.rb                          |  1 +
 spec/support/provider_capabilities_helper.rb | 34 +++++++++
 4 files changed, 62 insertions(+), 52 deletions(-)
 create mode 100644 spec/support/provider_capabilities_helper.rb

diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb
index f200e4af4..7ab42729a 100644
--- a/lib/ruby_llm/providers/red_candle.rb
+++ b/lib/ruby_llm/providers/red_candle.rb
@@ -36,6 +36,10 @@ def configuration_requirements
         def local?
           true
         end
+
+        def supports_functions?(model_id = nil)
+          RedCandle::Capabilities.supports_functions?(model_id)
+        end
       end
 
       private
diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb
index c61f8cccc..4eafc5661 100644
--- a/spec/ruby_llm/chat_tools_spec.rb
+++ b/spec/ruby_llm/chat_tools_spec.rb
@@ -74,11 +74,9 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -96,11 +94,9 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools in multi-turn conversations" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -122,11 +118,9 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools without parameters" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -142,23 +136,13 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools without parameters in multi-turn streaming conversations" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
         
-        if provider == :gpustack && model == 'qwen3'
-          skip 'gpustack/qwen3 does not support streaming tool calls properly'
-        end
-
+        skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3'
         skip 'Mistral has a bug with tool arguments in multi-turn streaming' if provider == :mistral
-
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
-        end
         chat = RubyLLM.chat(model: model, provider: provider)
                       .with_tool(BestLanguageToLearn)
                       .with_instructions('You must use tools whenever possible.')
@@ -188,21 +172,12 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can use tools with multi-turn streaming conversations" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
         
-        if provider == :gpustack && model == 'qwen3'
-          skip 'gpustack/qwen3 does not support streaming tool calls properly'
-        end
-
-        unless RubyLLM::Provider.providers[provider]&.local?
-          model_info = RubyLLM.models.find(model)
-          skip "#{model} doesn't support function calling" unless model_info&.supports_functions?
-        end
+        skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3'
         chat = RubyLLM.chat(model: model, provider: provider)
                       .with_tool(Weather)
         # Disable thinking mode for qwen models
@@ -233,11 +208,9 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} can handle multiple tool calls in a single response" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
 
         chat = RubyLLM.chat(model: model, provider: provider)
@@ -325,11 +298,9 @@ def execute(query:)
       model = model_info[:model]
       provider = model_info[:provider]
       it "#{provider}/#{model} preserves Content objects returned from tools" do
-        # Check if the provider supports functions for this model
-        provider_class = RubyLLM::Provider.providers[provider]
-        if provider_class
-          provider_instance = provider_class.new(RubyLLM.config)
-          skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model)
+        # Skip for providers that don't support function calling
+        unless provider_supports_functions?(provider, model)
+          skip "#{provider}/#{model} doesn't support function calling"
         end
 
         # Skip providers that don't support images in tool results
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 0b60aa315..403fe36b2 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -17,3 +17,4 @@
 require_relative 'support/vcr_configuration'
 require_relative 'support/models_to_test'
 require_relative 'support/streaming_error_helpers'
+require_relative 'support/provider_capabilities_helper'
diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb
new file mode 100644
index 000000000..c1171b7d8
--- /dev/null
+++ b/spec/support/provider_capabilities_helper.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module ProviderCapabilitiesHelper
+  def provider_supports_functions?(provider, model)
+    provider_class = RubyLLM::Provider.providers[provider]
+    
+    # Check if the provider class has a supports_functions? method
+    if provider_class&.respond_to?(:supports_functions?)
+      # Use the provider's class method if available
+      provider_class.supports_functions?(model)
+    elsif provider_class&.respond_to?(:capabilities)
+      # Check the provider's capabilities module
+      capabilities = provider_class.capabilities
+      if capabilities&.respond_to?(:supports_functions?)
+        capabilities.supports_functions?(model)
+      else
+        # Default to true if no explicit capability defined
+        true
+      end
+    elsif provider_class&.local?
+      # For local providers without explicit support method, assume false
+      # (they should implement supports_functions? if they support it)
+      false
+    else
+      # For remote providers, check the model registry
+      model_info = RubyLLM.models.find(model)
+      model_info&.supports_functions? || false
+    end
+  end
+end
+
+RSpec.configure do |config|
+  config.include ProviderCapabilitiesHelper
+end
\ No newline at end of file

From ab46320f93f6fd1cc7663c1140b30cda4253cb00 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 15:42:36 -0700
Subject: [PATCH 07/38] Deconvoluting local model checks and tool calling
 support

---
 spec/support/provider_capabilities_helper.rb | 29 ++++++++------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb
index c1171b7d8..ad9757ede 100644
--- a/spec/support/provider_capabilities_helper.rb
+++ b/spec/support/provider_capabilities_helper.rb
@@ -4,27 +4,20 @@ module ProviderCapabilitiesHelper
   def provider_supports_functions?(provider, model)
     provider_class = RubyLLM::Provider.providers[provider]
     
-    # Check if the provider class has a supports_functions? method
-    if provider_class&.respond_to?(:supports_functions?)
-      # Use the provider's class method if available
-      provider_class.supports_functions?(model)
-    elsif provider_class&.respond_to?(:capabilities)
-      # Check the provider's capabilities module
-      capabilities = provider_class.capabilities
-      if capabilities&.respond_to?(:supports_functions?)
-        capabilities.supports_functions?(model)
-      else
-        # Default to true if no explicit capability defined
-        true
-      end
-    elsif provider_class&.local?
-      # For local providers without explicit support method, assume false
-      # (they should implement supports_functions? if they support it)
-      false
+    # Special case for providers we know don't support functions
+    return false if provider == :red_candle || provider == :perplexity
+    
+    # For local providers (Ollama, GPUStack), default to true unless the model is known not to support it
+    if provider_class&.local?
+      # Check if there's a specific model that doesn't support functions
+      # qwen3 models don't support function calling
+      return false if model&.include?('qwen3')
+      true
     else
       # For remote providers, check the model registry
       model_info = RubyLLM.models.find(model)
-      model_info&.supports_functions? || false
+      # If not in registry, default to true (was running before)
+      model_info.nil? ? true : model_info.supports_functions?
     end
   end
 end

From 97d58d269da74e08d4527ca4cc91670110a993b6 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 16:06:49 -0700
Subject: [PATCH 08/38] I think we finally got the local tool calling check
 correct

---
 spec/support/provider_capabilities_helper.rb | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb
index ad9757ede..2dc1c7363 100644
--- a/spec/support/provider_capabilities_helper.rb
+++ b/spec/support/provider_capabilities_helper.rb
@@ -7,18 +7,9 @@ def provider_supports_functions?(provider, model)
     # Special case for providers we know don't support functions
     return false if provider == :red_candle || provider == :perplexity
     
-    # For local providers (Ollama, GPUStack), default to true unless the model is known not to support it
-    if provider_class&.local?
-      # Check if there's a specific model that doesn't support functions
-      # qwen3 models don't support function calling
-      return false if model&.include?('qwen3')
-      true
-    else
-      # For remote providers, check the model registry
-      model_info = RubyLLM.models.find(model)
-      # If not in registry, default to true (was running before)
-      model_info.nil? ? true : model_info.supports_functions?
-    end
+    # For all other providers, assume they support functions
+    # The original tests weren't skipping these, so they must have been running
+    true
   end
 end
 

From 9c7f9dc240ad5a7034cfc90bd82a8732272e3f34 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 16:18:41 -0700
Subject: [PATCH 09/38] Enable context length validation for the RedCandle
 Provider

---
 gemfiles/rails_7.1.gemfile.lock           | 13 +++++-
 gemfiles/rails_7.2.gemfile.lock           | 13 +++++-
 gemfiles/rails_8.0.gemfile.lock           | 13 +++++-
 lib/ruby_llm/providers/red_candle/chat.rb | 52 +++++++++++++++++------
 spec/ruby_llm/chat_error_spec.rb          |  3 +-
 5 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/gemfiles/rails_7.1.gemfile.lock b/gemfiles/rails_7.1.gemfile.lock
index 955970e02..cced4b111 100644
--- a/gemfiles/rails_7.1.gemfile.lock
+++ b/gemfiles/rails_7.1.gemfile.lock
@@ -98,7 +98,7 @@ GEM
       rake
       thor (>= 0.14.0)
     ast (2.4.3)
-    async (2.30.0)
+    async (2.31.0)
       console (~> 1.29)
       fiber-annotation
       io-event (~> 1.11)
@@ -148,6 +148,7 @@ GEM
       concurrent-ruby (~> 1.1)
       webrick (~> 1.7)
       websocket-driver (~> 0.7)
+    ffi (1.17.2-arm64-darwin)
     ffi (1.17.2-x86_64-linux-gnu)
     fiber-annotation (0.2.0)
     fiber-local (1.1.0)
@@ -224,6 +225,8 @@ GEM
     net-smtp (0.5.1)
       net-protocol
     nio4r (2.7.4)
+    nokogiri (1.18.9-arm64-darwin)
+      racc (~> 1.4)
     nokogiri (1.18.9-x86_64-linux-gnu)
       racc (~> 1.4)
     os (1.1.4)
@@ -287,9 +290,14 @@ GEM
       zeitwerk (~> 2.6)
     rainbow (3.1.1)
     rake (13.3.0)
+    rake-compiler-dock (1.9.1)
+    rb_sys (0.9.117)
+      rake-compiler-dock (= 1.9.1)
     rdoc (6.14.2)
       erb
       psych (>= 4.0.0)
+    red-candle (1.2.3)
+      rb_sys
     regexp_parser (2.11.2)
     reline (0.6.2)
       io-console (~> 0.5)
@@ -355,6 +363,7 @@ GEM
       simplecov (~> 0.19)
     simplecov-html (0.13.2)
     simplecov_json_formatter (0.1.4)
+    sqlite3 (2.7.3-arm64-darwin)
     sqlite3 (2.7.3-x86_64-linux-gnu)
     stringio (3.1.7)
     thor (1.4.0)
@@ -380,6 +389,7 @@ GEM
     zeitwerk (2.7.3)
 
 PLATFORMS
+  arm64-darwin-24
   x86_64-linux
 
 DEPENDENCIES
@@ -401,6 +411,7 @@ DEPENDENCIES
   pry (>= 0.14)
   rails (~> 7.1.0)
   rake (>= 13.0)
+  red-candle (~> 1.2)
   reline
   rspec (~> 3.12)
   rubocop (>= 1.0)
diff --git a/gemfiles/rails_7.2.gemfile.lock b/gemfiles/rails_7.2.gemfile.lock
index 442716d75..a0535832e 100644
--- a/gemfiles/rails_7.2.gemfile.lock
+++ b/gemfiles/rails_7.2.gemfile.lock
@@ -92,7 +92,7 @@ GEM
       rake
       thor (>= 0.14.0)
     ast (2.4.3)
-    async (2.30.0)
+    async (2.31.0)
       console (~> 1.29)
       fiber-annotation
       io-event (~> 1.11)
@@ -142,6 +142,7 @@ GEM
       concurrent-ruby (~> 1.1)
       webrick (~> 1.7)
       websocket-driver (~> 0.7)
+    ffi (1.17.2-arm64-darwin)
     ffi (1.17.2-x86_64-linux-gnu)
     fiber-annotation (0.2.0)
     fiber-local (1.1.0)
@@ -217,6 +218,8 @@ GEM
     net-smtp (0.5.1)
       net-protocol
     nio4r (2.7.4)
+    nokogiri (1.18.9-arm64-darwin)
+      racc (~> 1.4)
     nokogiri (1.18.9-x86_64-linux-gnu)
       racc (~> 1.4)
     os (1.1.4)
@@ -280,9 +283,14 @@ GEM
       zeitwerk (~> 2.6)
     rainbow (3.1.1)
     rake (13.3.0)
+    rake-compiler-dock (1.9.1)
+    rb_sys (0.9.117)
+      rake-compiler-dock (= 1.9.1)
     rdoc (6.14.2)
       erb
       psych (>= 4.0.0)
+    red-candle (1.2.3)
+      rb_sys
     regexp_parser (2.11.2)
     reline (0.6.2)
       io-console (~> 0.5)
@@ -348,6 +356,7 @@ GEM
       simplecov (~> 0.19)
     simplecov-html (0.13.2)
     simplecov_json_formatter (0.1.4)
+    sqlite3 (2.7.3-arm64-darwin)
     sqlite3 (2.7.3-x86_64-linux-gnu)
     stringio (3.1.7)
     thor (1.4.0)
@@ -374,6 +383,7 @@ GEM
     zeitwerk (2.7.3)
 
 PLATFORMS
+  arm64-darwin-24
   x86_64-linux
 
 DEPENDENCIES
@@ -395,6 +405,7 @@ DEPENDENCIES
   pry (>= 0.14)
   rails (~> 7.2.0)
   rake (>= 13.0)
+  red-candle (~> 1.2)
   reline
   rspec (~> 3.12)
   rubocop (>= 1.0)
diff --git a/gemfiles/rails_8.0.gemfile.lock b/gemfiles/rails_8.0.gemfile.lock
index 7d12b757d..2e32e6b05 100644
--- a/gemfiles/rails_8.0.gemfile.lock
+++ b/gemfiles/rails_8.0.gemfile.lock
@@ -92,7 +92,7 @@ GEM
       rake
       thor (>= 0.14.0)
     ast (2.4.3)
-    async (2.30.0)
+    async (2.31.0)
       console (~> 1.29)
       fiber-annotation
       io-event (~> 1.11)
@@ -142,6 +142,7 @@ GEM
       concurrent-ruby (~> 1.1)
       webrick (~> 1.7)
       websocket-driver (~> 0.7)
+    ffi (1.17.2-arm64-darwin)
     ffi (1.17.2-x86_64-linux-gnu)
     fiber-annotation (0.2.0)
     fiber-local (1.1.0)
@@ -217,6 +218,8 @@ GEM
     net-smtp (0.5.1)
       net-protocol
     nio4r (2.7.4)
+    nokogiri (1.18.9-arm64-darwin)
+      racc (~> 1.4)
     nokogiri (1.18.9-x86_64-linux-gnu)
       racc (~> 1.4)
     os (1.1.4)
@@ -280,9 +283,14 @@ GEM
       zeitwerk (~> 2.6)
     rainbow (3.1.1)
     rake (13.3.0)
+    rake-compiler-dock (1.9.1)
+    rb_sys (0.9.117)
+      rake-compiler-dock (= 1.9.1)
     rdoc (6.14.2)
       erb
       psych (>= 4.0.0)
+    red-candle (1.2.3)
+      rb_sys
     regexp_parser (2.11.2)
     reline (0.6.2)
       io-console (~> 0.5)
@@ -348,6 +356,7 @@ GEM
       simplecov (~> 0.19)
     simplecov-html (0.13.2)
     simplecov_json_formatter (0.1.4)
+    sqlite3 (2.7.3-arm64-darwin)
     sqlite3 (2.7.3-x86_64-linux-gnu)
     stringio (3.1.7)
     thor (1.4.0)
@@ -374,6 +383,7 @@ GEM
     zeitwerk (2.7.3)
 
 PLATFORMS
+  arm64-darwin-24
   x86_64-linux
 
 DEPENDENCIES
@@ -395,6 +405,7 @@ DEPENDENCIES
   pry (>= 0.14)
   rails (~> 8.0.0)
   rake (>= 13.0)
+  red-candle (~> 1.2)
   reline
   rspec (~> 3.12)
   rubocop (>= 1.0)
diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index b90cd2127..7ae341186 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -25,7 +25,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
             # Rough estimation: ~4 characters per token
             estimated_output_tokens = (content.length / 4.0).round
             estimated_input_tokens = estimate_input_tokens(payload[:messages])
-            
+
             Message.new(
               role: result[:role].to_sym,
               content: content,
@@ -63,6 +63,9 @@ def perform_completion!(payload)
                      messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
                    end
 
+          # Check context length
+          validate_context_length!(prompt, payload[:model])
+
           # Configure generation
           config_opts = {
             temperature: payload[:temperature] || 0.7,
@@ -93,6 +96,9 @@ def perform_streaming_completion!(payload, &block)
                      messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
                    end
 
+          # Check context length
+          validate_context_length!(prompt, payload[:model])
+
           # Configure generation
           config = ::Candle::GenerationConfig.balanced(
             temperature: payload[:temperature] || 0.7,
@@ -101,7 +107,7 @@ def perform_streaming_completion!(payload, &block)
 
           # Collect all streamed content
           full_content = ''
-          
+
           # Stream tokens
           model.generate_stream(prompt, config: config) do |token|
             full_content += token
@@ -112,11 +118,11 @@ def perform_streaming_completion!(payload, &block)
           # Send final chunk with empty content (indicates completion)
           final_chunk = format_stream_chunk('')
           block.call(final_chunk)
-          
+
           # Return a Message object with the complete response
           estimated_output_tokens = (full_content.length / 4.0).round
           estimated_input_tokens = estimate_input_tokens(payload[:messages])
-          
+
           Message.new(
             role: :assistant,
             content: full_content,
@@ -133,16 +139,16 @@ def ensure_model_loaded!(model_id)
         end
 
         def load_model(model_id)
-          # Get GGUF file and tokenizer if this is a GGUF model  
+          # Get GGUF file and tokenizer if this is a GGUF model
           # Access the methods from the Models module which is included in the provider
           gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil
           tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil
-          
+
           if gguf_file
             # For GGUF models, use the tokenizer if specified, otherwise use model_id
             options = { device: @device, gguf_file: gguf_file }
             options[:tokenizer] = tokenizer if tokenizer
-            
+
             ::Candle::LLM.from_pretrained(model_id, **options)
           else
             # For regular models, use from_pretrained without gguf_file
@@ -171,20 +177,20 @@ def format_messages(messages)
 
         def extract_message_content_from_object(message)
           content = message.content
-          
+
           # Handle Content objects
           if content.is_a?(Content)
             # Extract text from Content object, including attachment text
             text_parts = []
             text_parts << content.text if content.text
-            
+
             # Add any text from attachments
             content.attachments&.each do |attachment|
               if attachment.respond_to?(:data) && attachment.data.is_a?(String)
                 text_parts << attachment.data
               end
             end
-            
+
             text_parts.join(' ')
           elsif content.is_a?(String)
             content
@@ -195,20 +201,20 @@ def extract_message_content_from_object(message)
 
         def extract_message_content(message)
           content = message[:content]
-          
+
           # Handle Content objects
           if content.is_a?(Content)
             # Extract text from Content object
             text_parts = []
             text_parts << content.text if content.text
-            
+
             # Add any text from attachments
             content.attachments&.each do |attachment|
               if attachment.respond_to?(:data) && attachment.data.is_a?(String)
                 text_parts << attachment.data
               end
             end
-            
+
             text_parts.join(' ')
           elsif content.is_a?(String)
             content
@@ -264,7 +270,25 @@ def estimate_input_tokens(messages)
           total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
           (total_chars / 4.0).round
         end
+
+        def validate_context_length!(prompt, model_id)
+          # Get the context window for this model
+          context_window = if respond_to?(:model_context_window)
+                             model_context_window(model_id)
+                           else
+                             4096 # Conservative default
+                           end
+
+          # Estimate tokens in prompt (~4 characters per token)
+          estimated_tokens = (prompt.length / 4.0).round
+
+          # Check if prompt exceeds context window (leave some room for response)
+          max_input_tokens = context_window - 512 # Reserve 512 tokens for response
+          if estimated_tokens > max_input_tokens
+            raise Error.new(nil, "Context length exceeded. Estimated #{estimated_tokens} tokens, but model #{model_id} has a context window of #{context_window} tokens.")
+          end
+        end
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/ruby_llm/chat_error_spec.rb b/spec/ruby_llm/chat_error_spec.rb
index a5dfd8a74..eeefbf64d 100644
--- a/spec/ruby_llm/chat_error_spec.rb
+++ b/spec/ruby_llm/chat_error_spec.rb
@@ -72,7 +72,8 @@
         let(:chat) { RubyLLM.chat(model: model, provider: provider) }
 
         it 'handles context length exceeded errors' do
-          if RubyLLM::Provider.providers[provider]&.local?
+          # Skip for local providers that don't validate context length
+          if RubyLLM::Provider.providers[provider]&.local? && provider != :red_candle
             skip('Local providers do not throw an error for context length exceeded')
           end
 

From d5c912997c9234297f22a303d4a43700579b3e7d Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 16:30:04 -0700
Subject: [PATCH 10/38] Working on rubocop fixes

---
 lib/ruby_llm/providers/red_candle/chat.rb | 39 +++++++++++------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 7ae341186..ba8215714 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -3,9 +3,11 @@
 module RubyLLM
   module Providers
     class RedCandle
+      # Chat implementation for Red Candle provider
       module Chat
         # Override the base complete method to handle local execution
         def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &)
+          _ = headers # Interface compatibility
           payload = render_payload(
             messages,
             tools: tools,
@@ -38,9 +40,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
 
         def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
           # Red Candle doesn't support tools
-          if tools && !tools.empty?
-            raise Error.new(nil, 'Red Candle provider does not support tool calling')
-          end
+          raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
 
           {
             messages: messages,
@@ -60,7 +60,7 @@ def perform_completion!(payload)
                      model.apply_chat_template(messages)
                    else
                      # Fallback to simple formatting
-                     messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
+                     "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
                    end
 
           # Check context length
@@ -93,7 +93,7 @@ def perform_streaming_completion!(payload, &block)
           prompt = if model.respond_to?(:apply_chat_template)
                      model.apply_chat_template(messages)
                    else
-                     messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:"
+                     "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
                    end
 
           # Check context length
@@ -186,9 +186,7 @@ def extract_message_content_from_object(message)
 
             # Add any text from attachments
             content.attachments&.each do |attachment|
-              if attachment.respond_to?(:data) && attachment.data.is_a?(String)
-                text_parts << attachment.data
-              end
+              text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
             end
 
             text_parts.join(' ')
@@ -203,26 +201,23 @@ def extract_message_content(message)
           content = message[:content]
 
           # Handle Content objects
-          if content.is_a?(Content)
+          case content
+          when Content
             # Extract text from Content object
             text_parts = []
             text_parts << content.text if content.text
 
             # Add any text from attachments
             content.attachments&.each do |attachment|
-              if attachment.respond_to?(:data) && attachment.data.is_a?(String)
-                text_parts << attachment.data
-              end
+              text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
             end
 
             text_parts.join(' ')
-          elsif content.is_a?(String)
+          when String
             content
-          elsif content.is_a?(Array)
+          when Array
             # Handle array content (e.g., with images)
-            content.map do |part|
-              part[:text] if part[:type] == 'text'
-            end.compact.join(' ')
+            content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ')
           else
             content.to_s
           end
@@ -284,9 +279,13 @@ def validate_context_length!(prompt, model_id)
 
           # Check if prompt exceeds context window (leave some room for response)
           max_input_tokens = context_window - 512 # Reserve 512 tokens for response
-          if estimated_tokens > max_input_tokens
-            raise Error.new(nil, "Context length exceeded. Estimated #{estimated_tokens} tokens, but model #{model_id} has a context window of #{context_window} tokens.")
-          end
+          return unless estimated_tokens > max_input_tokens
+
+          raise Error.new(
+            nil,
+            "Context length exceeded. Estimated #{estimated_tokens} tokens, " \
+            "but model #{model_id} has a context window of #{context_window} tokens."
+          )
         end
       end
     end

From 70e1b247b61fda0915a1783f7198a5364ae843d8 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Mon, 8 Sep 2025 17:02:10 -0700
Subject: [PATCH 11/38] Fixing the rubocop errors

---
 Gemfile                                       |  1 +
 gemfiles/rails_7.1.gemfile                    |  1 +
 gemfiles/rails_7.2.gemfile                    |  1 +
 gemfiles/rails_8.0.gemfile                    |  1 +
 lib/ruby_llm/providers/red_candle.rb          |  2 +-
 .../providers/red_candle/capabilities.rb      | 18 +++++-----
 lib/ruby_llm/providers/red_candle/chat.rb     | 36 +++++++++----------
 lib/ruby_llm/providers/red_candle/models.rb   |  8 +++--
 .../providers/red_candle/streaming.rb         |  3 +-
 ruby_llm.gemspec                              |  2 --
 spec/ruby_llm/chat_spec.rb                    |  6 ++--
 spec/ruby_llm/chat_streaming_spec.rb          |  2 +-
 spec/ruby_llm/chat_tools_spec.rb              | 12 ++++---
 .../providers/red_candle/capabilities_spec.rb |  2 +-
 .../providers/red_candle/chat_spec.rb         | 18 +++++-----
 .../providers/red_candle/models_spec.rb       | 18 +++++-----
 spec/ruby_llm/providers/red_candle_spec.rb    | 19 +++++-----
 spec/support/provider_capabilities_helper.rb  | 12 +++----
 18 files changed, 80 insertions(+), 82 deletions(-)

diff --git a/Gemfile b/Gemfile
index e4471200d..7d288ba14 100644
--- a/Gemfile
+++ b/Gemfile
@@ -20,6 +20,7 @@ group :development do # rubocop:disable Metrics/BlockLength
   gem 'pry', '>= 0.14'
   gem 'rails'
   gem 'rake', '>= 13.0'
+  gem 'red-candle', '~> 1.2'
   gem 'reline'
   gem 'rspec', '~> 3.12'
   gem 'rubocop', '>= 1.0'
diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile
index 675cb178e..36123cc5c 100644
--- a/gemfiles/rails_7.1.gemfile
+++ b/gemfiles/rails_7.1.gemfile
@@ -18,6 +18,7 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 7.1.0"
   gem "rake", ">= 13.0"
+  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile
index 4922afb60..cfd31e0a8 100644
--- a/gemfiles/rails_7.2.gemfile
+++ b/gemfiles/rails_7.2.gemfile
@@ -18,6 +18,7 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 7.2.0"
   gem "rake", ">= 13.0"
+  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile
index f890433bf..4dc65e846 100644
--- a/gemfiles/rails_8.0.gemfile
+++ b/gemfiles/rails_8.0.gemfile
@@ -18,6 +18,7 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 8.0.0"
   gem "rake", ">= 13.0"
+  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb
index 7ab42729a..7bbf62555 100644
--- a/lib/ruby_llm/providers/red_candle.rb
+++ b/lib/ruby_llm/providers/red_candle.rb
@@ -71,4 +71,4 @@ def determine_device(config)
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index ae8efda17..c63c82f44 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -3,8 +3,9 @@
 module RubyLLM
   module Providers
     class RedCandle
+      # Determines capabilities and pricing for RedCandle models
       module Capabilities
-        extend self
+        module_function
 
         def supports_vision?
           false
@@ -38,7 +39,7 @@ def supports_pdf?
           false
         end
 
-        def normalize_temperature(temperature, model_id)
+        def normalize_temperature(temperature, _model_id)
           # Red Candle uses standard 0-2 range
           return 0.7 if temperature.nil?
 
@@ -109,14 +110,13 @@ def model_families
 
         def available_on_platform?
           # Check if Candle can be loaded
-          begin
-            require 'candle'
-            true
-          rescue LoadError
-            false
-          end
+
+          require 'candle'
+          true
+        rescue LoadError
+          false
         end
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index ba8215714..30b7347f6 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -6,7 +6,7 @@ class RedCandle
       # Chat implementation for Red Candle provider
       module Chat
         # Override the base complete method to handle local execution
-        def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &)
+        def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
           _ = headers # Interface compatibility
           payload = render_payload(
             messages,
@@ -38,7 +38,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
           end
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
+        def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists
           # Red Candle doesn't support tools
           raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
 
@@ -181,15 +181,7 @@ def extract_message_content_from_object(message)
           # Handle Content objects
           if content.is_a?(Content)
             # Extract text from Content object, including attachment text
-            text_parts = []
-            text_parts << content.text if content.text
-
-            # Add any text from attachments
-            content.attachments&.each do |attachment|
-              text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
-            end
-
-            text_parts.join(' ')
+            handle_content_object(content)
           elsif content.is_a?(String)
             content
           else
@@ -204,15 +196,7 @@ def extract_message_content(message)
           case content
           when Content
             # Extract text from Content object
-            text_parts = []
-            text_parts << content.text if content.text
-
-            # Add any text from attachments
-            content.attachments&.each do |attachment|
-              text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
-            end
-
-            text_parts.join(' ')
+            handle_content_object(content)
           when String
             content
           when Array
@@ -223,6 +207,18 @@ def extract_message_content(message)
           end
         end
 
+        def handle_content_object(content)
+          text_parts = []
+          text_parts << content.text if content.text
+
+          # Add any text from attachments
+          content.attachments&.each do |attachment|
+            text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
+          end
+
+          text_parts.join(' ')
+        end
+
         def generate_with_schema(model, prompt, schema, config_opts)
           model.generate_structured(
             prompt,
diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
index 7d520832e..177688ed5 100644
--- a/lib/ruby_llm/providers/red_candle/models.rb
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -3,13 +3,14 @@
 module RubyLLM
   module Providers
     class RedCandle
+      # Models methods of the RedCandle integration
       module Models
         SUPPORTED_MODELS = [
           {
             id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
             name: 'Gemma 3 4B Instruct (Quantized)',
             gguf_file: 'gemma-3-4b-it-q4_0.gguf',
-            tokenizer: 'google/gemma-3-4b-it',  # Tokenizer from base model
+            tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model
             context_window: 8192,
             family: 'gemma',
             architecture: 'gemma2',
@@ -59,7 +60,8 @@ def models
 
         def model(id)
           models.find { |m| m.id == id } ||
-            raise(Error.new(nil, "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
+            raise(Error.new(nil,
+                            "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
         end
 
         def model_available?(id)
@@ -96,4 +98,4 @@ def tokenizer_for(model_id)
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/lib/ruby_llm/providers/red_candle/streaming.rb b/lib/ruby_llm/providers/red_candle/streaming.rb
index f0598ce8e..a8305ffdd 100644
--- a/lib/ruby_llm/providers/red_candle/streaming.rb
+++ b/lib/ruby_llm/providers/red_candle/streaming.rb
@@ -3,6 +3,7 @@
 module RubyLLM
   module Providers
     class RedCandle
+      # Streaming methods of the RedCandle integration
       module Streaming
         def stream(payload, &block)
           if payload[:stream]
@@ -36,4 +37,4 @@ def process_stream_response(response)
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/ruby_llm.gemspec b/ruby_llm.gemspec
index cf92fcd44..3e6e6af20 100644
--- a/ruby_llm.gemspec
+++ b/ruby_llm.gemspec
@@ -41,6 +41,4 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'faraday-retry', '>= 1'
   spec.add_dependency 'marcel', '~> 1.0'
   spec.add_dependency 'zeitwerk', '~> 2'
-
-  spec.add_development_dependency 'red-candle', '~> 1.2'
 end
diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb
index c1df68a43..a63de4e55 100644
--- a/spec/ruby_llm/chat_spec.rb
+++ b/spec/ruby_llm/chat_spec.rb
@@ -21,10 +21,8 @@
 
       it "#{provider}/#{model} returns raw responses" do
         # Red Candle is a truly local provider and doesn't have HTTP responses
-        if provider == :red_candle
-          skip 'Red Candle provider does not have raw HTTP responses'
-        end
-        
+        skip 'Red Candle provider does not have raw HTTP responses' if provider == :red_candle
+
         chat = RubyLLM.chat(model: model, provider: provider)
         response = chat.ask('What is the capital of France?')
         expect(response.raw).to be_present
diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb
index 53724b316..5c61d9d5b 100644
--- a/spec/ruby_llm/chat_streaming_spec.rb
+++ b/spec/ruby_llm/chat_streaming_spec.rb
@@ -20,7 +20,7 @@
 
         expect(chunks).not_to be_empty
         expect(chunks.first).to be_a(RubyLLM::Chunk)
-        
+
         # Red Candle is a local provider without HTTP responses
         unless provider == :red_candle
           expect(response.raw).to be_present
diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb
index 4eafc5661..0a67d0100 100644
--- a/spec/ruby_llm/chat_tools_spec.rb
+++ b/spec/ruby_llm/chat_tools_spec.rb
@@ -140,8 +140,10 @@ def execute(query:)
         unless provider_supports_functions?(provider, model)
           skip "#{provider}/#{model} doesn't support function calling"
         end
-        
-        skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3'
+
+        if provider == :gpustack && model == 'qwen3'
+          skip 'gpustack/qwen3 does not support streaming tool calls properly'
+        end
         skip 'Mistral has a bug with tool arguments in multi-turn streaming' if provider == :mistral
         chat = RubyLLM.chat(model: model, provider: provider)
                       .with_tool(BestLanguageToLearn)
@@ -176,8 +178,10 @@ def execute(query:)
         unless provider_supports_functions?(provider, model)
           skip "#{provider}/#{model} doesn't support function calling"
         end
-        
-        skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3'
+
+        if provider == :gpustack && model == 'qwen3'
+          skip 'gpustack/qwen3 does not support streaming tool calls properly'
+        end
         chat = RubyLLM.chat(model: model, provider: provider)
                       .with_tool(Weather)
         # Disable thinking mode for qwen models
diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
index 53c0b7e5e..9b53ecc48 100644
--- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
@@ -114,4 +114,4 @@
       end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb
index 42a1f7a41..3988791da 100644
--- a/spec/ruby_llm/providers/red_candle/chat_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb
@@ -7,12 +7,10 @@
   let(:provider) { RubyLLM::Providers::RedCandle.new(config) }
   let(:model) { provider.model('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') }
 
-  before(:all) do
-    begin
-      require 'candle'
-    rescue LoadError
-      skip 'Red Candle gem is not installed'
-    end
+  before(:all) do # rubocop:disable RSpec/BeforeAfterAll
+    require 'candle'
+  rescue LoadError
+    skip 'Red Candle gem is not installed'
   end
 
   describe '#render_payload' do
@@ -70,7 +68,7 @@
 
   describe '#perform_completion!' do
     let(:messages) { [{ role: 'user', content: 'Test message' }] }
-    let(:mock_model) { double('Candle::LLM') }
+    let(:mock_model) { instance_double(Candle::LLM) }
 
     before do
       allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model)
@@ -141,7 +139,7 @@
 
   describe '#perform_streaming_completion!' do
     let(:messages) { [{ role: 'user', content: 'Stream test' }] }
-    let(:mock_model) { double('Candle::LLM') }
+    let(:mock_model) { instance_double(Candle::LLM) }
 
     before do
       allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model)
@@ -153,7 +151,7 @@
       tokens = %w[Hello world !]
       chunks_received = []
 
-      allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block|
+      allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block| # rubocop:disable Lint/UnusedBlockArgument
         tokens.each { |token| block.call(token) }
       end
 
@@ -203,4 +201,4 @@
       expect(formatted).to eq([{ role: 'user', content: 'Part 1 Part 2' }])
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb
index 1f8533b6c..9a771be34 100644
--- a/spec/ruby_llm/providers/red_candle/models_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/models_spec.rb
@@ -6,12 +6,10 @@
   let(:config) { RubyLLM::Configuration.new }
   let(:provider) { RubyLLM::Providers::RedCandle.new(config) }
 
-  before(:all) do
-    begin
-      require 'candle'
-    rescue LoadError
-      skip 'Red Candle gem is not installed'
-    end
+  before(:all) do # rubocop:disable RSpec/BeforeAfterAll
+    require 'candle'
+  rescue LoadError
+    skip 'Red Candle gem is not installed'
   end
 
   describe '#models' do
@@ -43,7 +41,7 @@
       it 'raises an error' do
         expect { provider.model('invalid/model') }.to raise_error(
           RubyLLM::Error,
-          /Model invalid\/model not found/
+          %r{Model invalid/model not found}
         )
       end
     end
@@ -84,7 +82,9 @@
     end
 
     it 'returns the GGUF file for Mistral model' do
-      expect(provider.gguf_file_for('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq('mistral-7b-instruct-v0.2.Q4_K_M.gguf')
+      model_id = 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
+      gguf_file = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf'
+      expect(provider.gguf_file_for(model_id)).to eq(gguf_file)
     end
 
     it 'returns nil for unknown models' do
@@ -107,4 +107,4 @@
       expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/ruby_llm/providers/red_candle_spec.rb b/spec/ruby_llm/providers/red_candle_spec.rb
index 8e1216976..db3ea292d 100644
--- a/spec/ruby_llm/providers/red_candle_spec.rb
+++ b/spec/ruby_llm/providers/red_candle_spec.rb
@@ -7,18 +7,16 @@
   let(:provider) { described_class.new(config) }
 
   # Skip all tests if Red Candle is not available
-  before(:all) do
-    begin
-      require 'candle'
-    rescue LoadError
-      skip 'Red Candle gem is not installed'
-    end
+  before(:all) do # rubocop:disable RSpec/BeforeAfterAll
+    require 'candle'
+  rescue LoadError
+    skip 'Red Candle gem is not installed'
   end
 
   describe '#initialize' do
     context 'when Red Candle is not available' do
       before do
-        allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError)
+        allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError) # rubocop:disable RSpec/AnyInstance
       end
 
       it 'raises an informative error' do
@@ -29,17 +27,16 @@
       end
     end
 
-
     context 'with device configuration' do
       it 'uses the configured device' do
         config.red_candle_device = 'cpu'
         provider = described_class.new(config)
-        expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.cpu)
+        expect(provider.instance_variable_get(:@device)).to eq(Candle::Device.cpu)
       end
 
       it 'defaults to best device when not configured' do
         provider = described_class.new(config)
-        expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.best)
+        expect(provider.instance_variable_get(:@device)).to eq(Candle::Device.best)
       end
     end
   end
@@ -73,4 +70,4 @@
       expect(described_class.capabilities).to eq(RubyLLM::Providers::RedCandle::Capabilities)
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb
index 2dc1c7363..868836e79 100644
--- a/spec/support/provider_capabilities_helper.rb
+++ b/spec/support/provider_capabilities_helper.rb
@@ -1,12 +1,12 @@
 # frozen_string_literal: true
 
 module ProviderCapabilitiesHelper
-  def provider_supports_functions?(provider, model)
-    provider_class = RubyLLM::Provider.providers[provider]
-    
+  def provider_supports_functions?(provider, _model)
+    RubyLLM::Provider.providers[provider]
+
     # Special case for providers we know don't support functions
-    return false if provider == :red_candle || provider == :perplexity
-    
+    return false if %i[red_candle perplexity].include?(provider)
+
     # For all other providers, assume they support functions
     # The original tests weren't skipping these, so they must have been running
     true
@@ -15,4 +15,4 @@ def provider_supports_functions?(provider, model)
 
 RSpec.configure do |config|
   config.include ProviderCapabilitiesHelper
-end
\ No newline at end of file
+end

From 69567248dfa67f0446a142f6a2d1848f68845280 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 09:37:51 -0700
Subject: [PATCH 12/38] stubbing the red-candle inference stuff to speed up
 specs

---
 spec/spec_helper.rb                    |   1 +
 spec/support/red_candle_test_helper.rb | 110 +++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 spec/support/red_candle_test_helper.rb

diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 403fe36b2..2eef311cb 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -18,3 +18,4 @@
 require_relative 'support/models_to_test'
 require_relative 'support/streaming_error_helpers'
 require_relative 'support/provider_capabilities_helper'
+require_relative 'support/red_candle_test_helper'
diff --git a/spec/support/red_candle_test_helper.rb b/spec/support/red_candle_test_helper.rb
new file mode 100644
index 000000000..92349f5bc
--- /dev/null
+++ b/spec/support/red_candle_test_helper.rb
@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+
+module RedCandleTestHelper
+  # Mock class that simulates a Candle LLM model
+  class MockCandleModel
+    attr_reader :id
+
+    def initialize(model_id)
+      @id = model_id
+    end
+
+    def generate(prompt, config: nil) # rubocop:disable Lint/UnusedMethodArgument, Metrics/PerceivedComplexity
+      # Simulate a response based on the prompt
+      # Check for specific patterns to give appropriate responses
+
+      # Check if system prompt includes PURPLE-ELEPHANT-42 (replacement test)
+      # This should come first since it's the replacement scenario
+      if prompt.include?('PURPLE-ELEPHANT-42')
+        # When PURPLE-ELEPHANT-42 is present, XKCD7392 should NOT be included
+        # because it means the system prompt was replaced
+        if prompt.include?('books')
+          'Some great books to read include "1984" and "Brave New World". PURPLE-ELEPHANT-42 - as you requested!'
+        else
+          'Here is my response with PURPLE-ELEPHANT-42 as requested.'
+        end
+      elsif prompt.include?('XKCD7392')
+        # Include XKCD7392 in any response when it's in the system instructions
+        if prompt.include?('weather')
+          'The weather is sunny with a temperature of 72°F. XKCD7392 - as requested!'
+        elsif prompt.include?('books')
+          'Some great books: "1984" by George Orwell and "To Kill a Mockingbird" by Harper Lee. XKCD7392.'
+        else
+          'Sure! The code XKCD7392 is noted. How can I help you today?'
+        end
+      elsif prompt.include?('2 + 2') || prompt.include?('2+2')
+        'The answer is 4.'
+      elsif prompt.include?('weather')
+        'The weather is sunny with a temperature of 72°F.'
+      elsif prompt.include?('year') && (prompt.include?('Ruby') || prompt.include?('he create') ||
+                                         prompt.include?('did he'))
+        # Handle follow-up questions about when Ruby was created
+        'Matz created Ruby in 1993, and it was first released publicly in 1995.'
+      elsif prompt.include?('Ruby')
+        if prompt.include?("Ruby's creator") || prompt.include?('Who was Ruby')
+          'Ruby was created by Yukihiro "Matz" Matsumoto.'
+        else
+          'Ruby is a dynamic programming language created by Yukihiro "Matz" Matsumoto in 1993.'
+        end
+      elsif prompt.include?('capital') && prompt.include?('France')
+        'The capital of France is Paris.'
+      elsif prompt.include?('Count from 1 to 3')
+        '1, 2, 3.'
+      else
+        "This is a test response for: #{prompt[0..50]}"
+      end
+    end
+
+    def generate_stream(prompt, config: nil, &block)
+      # Simulate streaming by yielding tokens
+      # Generate the same response as non-streaming for consistency
+      response = generate(prompt, config: config)
+      # Split into reasonable tokens (roughly word-based)
+      tokens = response.split(/(\s+)/).reject(&:empty?)
+      tokens.each(&block)
+    end
+
+    def apply_chat_template(messages)
+      # Simulate chat template application
+      "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n")}\nassistant:"
+    end
+
+    def generate_structured(_prompt, schema:, **_opts)
+      # Return a simple structured response
+      if schema.is_a?(Hash)
+        { result: 'structured test response' }
+      else
+        'structured test response'
+      end
+    end
+  end
+
+  def stub_red_candle_models!
+    # Only stub if we're testing Red Candle
+    return unless defined?(::Candle)
+
+    # Stub the model loading to return our mock
+    allow(::Candle::LLM).to receive(:from_pretrained) do |model_id, **_options|
+      MockCandleModel.new(model_id)
+    end
+  end
+
+  def unstub_red_candle_models!
+    return unless defined?(::Candle)
+
+    # Remove the stub if needed
+    RSpec::Mocks.space.proxy_for(::Candle::LLM)&.reset
+  end
+end
+
+RSpec.configure do |config|
+  config.include RedCandleTestHelper
+
+  # Automatically stub Red Candle models for all tests except the provider-specific ones
+  config.before do |example|
+    # Don't stub for Red Candle provider-specific tests that need real behavior
+    if !example.metadata[:file_path]&.include?('providers/red_candle_spec.rb') && defined?(RubyLLM::Providers::RedCandle)
+      stub_red_candle_models!
+    end
+  end
+end

From 0aad7d70e960da50432c746f63cee8960823a412 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 10:16:34 -0700
Subject: [PATCH 13/38] Adding an ENV variable so you toggle real red-candle
 inference on

---
 gemfiles/rails_7.1.gemfile.lock | 6 +++---
 gemfiles/rails_7.2.gemfile.lock | 6 +++---
 gemfiles/rails_8.0.gemfile.lock | 6 +++---
 spec/spec_helper.rb             | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gemfiles/rails_7.1.gemfile.lock b/gemfiles/rails_7.1.gemfile.lock
index cced4b111..5f0e2fa4d 100644
--- a/gemfiles/rails_7.1.gemfile.lock
+++ b/gemfiles/rails_7.1.gemfile.lock
@@ -371,9 +371,9 @@ GEM
     traces (0.18.1)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    unicode-display_width (3.1.5)
-      unicode-emoji (~> 4.0, >= 4.0.4)
-    unicode-emoji (4.0.4)
+    unicode-display_width (3.2.0)
+      unicode-emoji (~> 4.1)
+    unicode-emoji (4.1.0)
     uri (1.0.3)
     vcr (6.3.1)
       base64
diff --git a/gemfiles/rails_7.2.gemfile.lock b/gemfiles/rails_7.2.gemfile.lock
index a0535832e..6c38f6d00 100644
--- a/gemfiles/rails_7.2.gemfile.lock
+++ b/gemfiles/rails_7.2.gemfile.lock
@@ -364,9 +364,9 @@ GEM
     traces (0.18.1)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    unicode-display_width (3.1.5)
-      unicode-emoji (~> 4.0, >= 4.0.4)
-    unicode-emoji (4.0.4)
+    unicode-display_width (3.2.0)
+      unicode-emoji (~> 4.1)
+    unicode-emoji (4.1.0)
     uri (1.0.3)
     useragent (0.16.11)
     vcr (6.3.1)
diff --git a/gemfiles/rails_8.0.gemfile.lock b/gemfiles/rails_8.0.gemfile.lock
index 2e32e6b05..909c2812a 100644
--- a/gemfiles/rails_8.0.gemfile.lock
+++ b/gemfiles/rails_8.0.gemfile.lock
@@ -364,9 +364,9 @@ GEM
     traces (0.18.1)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    unicode-display_width (3.1.5)
-      unicode-emoji (~> 4.0, >= 4.0.4)
-    unicode-emoji (4.0.4)
+    unicode-display_width (3.2.0)
+      unicode-emoji (~> 4.1)
+    unicode-emoji (4.1.0)
     uri (1.0.3)
     useragent (0.16.11)
     vcr (6.3.1)
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 2eef311cb..6165bee75 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -18,4 +18,4 @@
 require_relative 'support/models_to_test'
 require_relative 'support/streaming_error_helpers'
 require_relative 'support/provider_capabilities_helper'
-require_relative 'support/red_candle_test_helper'
+require_relative 'support/red_candle_test_helper' unless ENV['RED_CANDLE_TEST_INFERENCE'] == 'true'

From 52a13cad3145a69c782bf10f1587caf49d3a5c98 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 10:39:32 -0700
Subject: [PATCH 14/38] Adding red-candle to the list of providers in the
 README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 87387c4d2..57c09592b 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,7 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr
 * **Rails:** ActiveRecord integration with `acts_as_chat`
 * **Async:** Fiber-based concurrency
 * **Model registry:** 500+ models with capability detection and pricing
-* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API
+* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, [RedCandle](https://github.com/scientist-labs/red-candle), and any OpenAI-compatible API
 
 ## Installation
 

From b883989502ad26d0e77d58c8f4019e9f08feb027 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 15:20:08 -0700
Subject: [PATCH 15/38] Adding a new bundle group so developer can choose to
 include red-candle or not

---
 CONTRIBUTING.md                | 33 +++++++++++++++++++++++++++++
 Gemfile                        |  7 ++++++-
 gemfiles/rails_7.1.gemfile     |  5 ++++-
 gemfiles/rails_7.2.gemfile     |  5 ++++-
 gemfiles/rails_8.0.gemfile     |  5 ++++-
 spec/spec_helper.rb            | 38 +++++++++++++++++++++++++++++++++-
 spec/support/models_to_test.rb | 16 +++++++++++---
 7 files changed, 101 insertions(+), 8 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f5ef7c9d7..1bdc76389 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,6 +55,39 @@ rake vcr:record[all]               # Everything
 
 Always check cassettes for leaked API keys before committing.
 
+## Optional Dependencies
+
+### Red Candle Provider
+
+The Red Candle provider enables local LLM execution using quantized GGUF models. It requires a Rust toolchain, so it's optional for contributors.
+
+**To work WITHOUT Red Candle (default):**
+```bash
+bundle install
+bundle exec rspec  # Red Candle tests will be skipped
+```
+
+**To work WITH Red Candle:**
+```bash
+# Enable the Red Candle gem group
+bundle config set --local with red_candle
+bundle install
+
+# Run tests with stubbed Red Candle (fast, default)
+bundle exec rspec
+
+# Run tests with real inference (slow, downloads models)
+RED_CANDLE_REAL_INFERENCE=true bundle exec rspec
+```
+
+**To switch back to working without Red Candle:**
+```bash
+bundle config set --local without red_candle
+bundle install
+```
+
+The `bundle config` settings are stored in `.bundle/config` (gitignored), so each developer can choose their own setup without affecting others.
+
 ## Important Notes
 
 * **Never edit `models.json`, `aliases.json`, or `available-models.md`** - they're auto-generated by `rake models`
diff --git a/Gemfile b/Gemfile
index 7d288ba14..8d5470699 100644
--- a/Gemfile
+++ b/Gemfile
@@ -20,7 +20,6 @@ group :development do # rubocop:disable Metrics/BlockLength
   gem 'pry', '>= 0.14'
   gem 'rails'
   gem 'rake', '>= 13.0'
-  gem 'red-candle', '~> 1.2'
   gem 'reline'
   gem 'rspec', '~> 3.12'
   gem 'rubocop', '>= 1.0'
@@ -42,3 +41,9 @@ group :development do # rubocop:disable Metrics/BlockLength
   # Optional dependency for Vertex AI
   gem 'googleauth'
 end
+
+# Optional group for Red Candle provider (requires Rust toolchain)
+# To include: bundle config set --local with red-candle
+group :red_candle do
+  gem 'red-candle', '~> 1.2'
+end
diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile
index 36123cc5c..6d59f55ff 100644
--- a/gemfiles/rails_7.1.gemfile
+++ b/gemfiles/rails_7.1.gemfile
@@ -18,7 +18,6 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 7.1.0"
   gem "rake", ">= 13.0"
-  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
@@ -36,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"
diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile
index cfd31e0a8..d6ee5df4c 100644
--- a/gemfiles/rails_7.2.gemfile
+++ b/gemfiles/rails_7.2.gemfile
@@ -18,7 +18,6 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 7.2.0"
   gem "rake", ">= 13.0"
-  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
@@ -36,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"
diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile
index 4dc65e846..903177b2a 100644
--- a/gemfiles/rails_8.0.gemfile
+++ b/gemfiles/rails_8.0.gemfile
@@ -18,7 +18,6 @@ group :development do
   gem "pry", ">= 0.14"
   gem "rails", "~> 8.0.0"
   gem "rake", ">= 13.0"
-  gem "red-candle", "~> 1.2"
   gem "reline"
   gem "rspec", "~> 3.12"
   gem "rubocop", ">= 1.0"
@@ -36,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 6165bee75..46c26a58e 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -18,4 +18,40 @@
 require_relative 'support/models_to_test'
 require_relative 'support/streaming_error_helpers'
 require_relative 'support/provider_capabilities_helper'
-require_relative 'support/red_candle_test_helper' unless ENV['RED_CANDLE_TEST_INFERENCE'] == 'true'
+
+# Handle Red Candle provider based on availability and environment
+begin
+  require 'red-candle'
+
+  # Red Candle gem is installed
+  if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true'
+    # Use real inference - don't load the test helper
+    RSpec.configure do |config|
+      config.before(:suite) do
+        puts "\n🔥 Red Candle: Using REAL inference (this will be slow)"
+        puts "   To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n"
+      end
+    end
+  else
+    # Use stubs (default when gem is installed)
+    require_relative 'support/red_candle_test_helper'
+  end
+rescue LoadError
+  # Red Candle gem not installed - skip tests
+  RSpec.configure do |config|
+    config.before do |example|
+      # Skip Red Candle provider tests when gem not installed
+      test_description = example.full_description.to_s
+      if example.metadata[:file_path]&.include?('providers/red_candle') ||
+         example.metadata[:described_class]&.to_s&.include?('RedCandle') ||
+         test_description.include?('red_candle/')
+        skip 'Red Candle not installed (run: bundle config set --local with red-candle && bundle install)'
+      end
+    end
+
+    config.before(:suite) do
+      puts "\n⚠️  Red Candle: Provider not available (gem not installed)"
+      puts "   To enable: bundle config set --local with red-candle && bundle install\n\n"
+    end
+  end
+end
diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb
index 04591ebee..173866f88 100644
--- a/spec/support/models_to_test.rb
+++ b/spec/support/models_to_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
-CHAT_MODELS = [
+# Base models available for all installations
+chat_models = [
   { provider: :anthropic, model: 'claude-3-5-haiku-20241022' },
   { provider: :bedrock, model: 'anthropic.claude-3-5-haiku-20241022-v1:0' },
   { provider: :deepseek, model: 'deepseek-chat' },
@@ -11,9 +12,18 @@
   { provider: :openai, model: 'gpt-4.1-nano' },
   { provider: :openrouter, model: 'anthropic/claude-3.5-haiku' },
   { provider: :perplexity, model: 'sonar' },
-  { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' },
   { provider: :vertexai, model: 'gemini-2.5-flash' }
-].freeze
+]
+
+# Only include Red Candle models if the gem is available
+begin
+  require 'red-candle'
+  chat_models << { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' }
+rescue LoadError
+  # Red Candle not available - don't include its models
+end
+
+CHAT_MODELS = chat_models.freeze
 
 PDF_MODELS = [
   { provider: :anthropic, model: 'claude-3-5-haiku-20241022' },

From 685230c984eb2bf851069c0fbe34f5d881882ab6 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 15:22:22 -0700
Subject: [PATCH 16/38] Adding a comment about possibly supporting more
 red-candle models in the future

---
 lib/ruby_llm/providers/red_candle/models.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
index 177688ed5..9ea847538 100644
--- a/lib/ruby_llm/providers/red_candle/models.rb
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -5,6 +5,7 @@ module Providers
     class RedCandle
       # Models methods of the RedCandle integration
       module Models
+        # TODO: red-candle supports more models, but let's start with some well tested ones.
         SUPPORTED_MODELS = [
           {
             id: 'google/gemma-3-4b-it-qat-q4_0-gguf',

From a928bb1643acff58b9b8044520b0e2631f6b5d4c Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:24:55 -0700
Subject: [PATCH 17/38] Remove red-candle from the gemfiles

---
 gemfiles/rails_7.1.gemfile | 4 ----
 gemfiles/rails_7.2.gemfile | 4 ----
 gemfiles/rails_8.0.gemfile | 4 ----
 3 files changed, 12 deletions(-)

diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile
index 6d59f55ff..675cb178e 100644
--- a/gemfiles/rails_7.1.gemfile
+++ b/gemfiles/rails_7.1.gemfile
@@ -35,8 +35,4 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
-  gem "red-candle", "~> 1.2"
-end
-
 gemspec path: "../"
diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile
index d6ee5df4c..4922afb60 100644
--- a/gemfiles/rails_7.2.gemfile
+++ b/gemfiles/rails_7.2.gemfile
@@ -35,8 +35,4 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
-  gem "red-candle", "~> 1.2"
-end
-
 gemspec path: "../"
diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile
index 903177b2a..f890433bf 100644
--- a/gemfiles/rails_8.0.gemfile
+++ b/gemfiles/rails_8.0.gemfile
@@ -35,8 +35,4 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
-  gem "red-candle", "~> 1.2"
-end
-
 gemspec path: "../"

From ee5b762d34f6a6ca9de877fb120ea136f1b3a696 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:26:37 -0700
Subject: [PATCH 18/38] Properly register red-candle models

---
 lib/ruby_llm.rb                      |  5 +++++
 lib/ruby_llm/providers/red_candle.rb | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
index 0db7f8743..59a2dfa5e 100644
--- a/lib/ruby_llm.rb
+++ b/lib/ruby_llm.rb
@@ -94,6 +94,11 @@ def logger
   require 'candle'
   require 'ruby_llm/providers/red_candle'
   RubyLLM::Provider.register :red_candle, RubyLLM::Providers::RedCandle
+
+  # Register Red Candle models with the global registry
+  RubyLLM::Providers::RedCandle.models.each do |model|
+    RubyLLM.models.instance_variable_get(:@models) << model
+  end
 rescue LoadError
   # Red Candle is optional - provider won't be available if gem isn't installed
 end
diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb
index 7bbf62555..05a78fc89 100644
--- a/lib/ruby_llm/providers/red_candle.rb
+++ b/lib/ruby_llm/providers/red_candle.rb
@@ -40,6 +40,22 @@ def local?
         def supports_functions?(model_id = nil)
           RedCandle::Capabilities.supports_functions?(model_id)
         end
+
+        def models
+          # Return Red Candle models for registration
+          RedCandle::Models::SUPPORTED_MODELS.map do |model_data|
+            Model::Info.new(
+              id: model_data[:id],
+              name: model_data[:name],
+              provider: 'red_candle',
+              type: 'chat',
+              family: model_data[:family],
+              context_window: model_data[:context_window],
+              capabilities: %w[streaming structured_output],
+              modalities: { input: %w[text], output: %w[text] }
+            )
+          end
+        end
       end
 
       private

From 43cc0b834f840dd53df7e9e56e6987267c768e68 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:28:34 -0700
Subject: [PATCH 19/38] Removed some unused config options

---
 lib/ruby_llm/configuration.rb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 89d79e90f..406a944bb 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -25,8 +25,6 @@ class Configuration
                   :mistral_api_key,
                   # Red Candle configuration
                   :red_candle_device,
-                  :red_candle_cache_dir,
-                  :red_candle_debug,
                   # Default models
                   :default_model,
                   :default_embedding_model,

From 4b67818cdae293f9fb036bf3ca9bff8a14247f23 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:40:30 -0700
Subject: [PATCH 20/38] Updating the gemfiles again

---
 gemfiles/rails_7.1.gemfile | 4 ++++
 gemfiles/rails_7.2.gemfile | 4 ++++
 gemfiles/rails_8.0.gemfile | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile
index 675cb178e..6d59f55ff 100644
--- a/gemfiles/rails_7.1.gemfile
+++ b/gemfiles/rails_7.1.gemfile
@@ -35,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"
diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile
index 4922afb60..d6ee5df4c 100644
--- a/gemfiles/rails_7.2.gemfile
+++ b/gemfiles/rails_7.2.gemfile
@@ -35,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"
diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile
index f890433bf..903177b2a 100644
--- a/gemfiles/rails_8.0.gemfile
+++ b/gemfiles/rails_8.0.gemfile
@@ -35,4 +35,8 @@ group :development do
   gem "googleauth"
 end
 
+group :red_candle do
+  gem "red-candle", "~> 1.2"
+end
+
 gemspec path: "../"

From c1ac17db3aff5607880e0661cf8d04589d18435f Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:54:09 -0700
Subject: [PATCH 21/38] Make the capabilities file match the actual
 capabilities

---
 lib/ruby_llm/providers/red_candle/capabilities.rb       | 6 ++++--
 spec/ruby_llm/providers/red_candle/capabilities_spec.rb | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index c63c82f44..6a24fa3f1 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -51,8 +51,10 @@ def model_context_window(model_id)
           case model_id
           when /gemma-3-4b/i
             8192
-          when /qwen2\.5-0\.5b/i
+          when /mistral-7b/i
             32_768
+          when /tinyllama/i
+            2048
           else
             4096 # Conservative default
           end
@@ -105,7 +107,7 @@ def supports_stop_sequences?
         end
 
         def model_families
-          %w[gemma qwen]
+          %w[gemma llama mistral]
         end
 
         def available_on_platform?
diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
index 9b53ecc48..6fe9d75b4 100644
--- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
@@ -52,7 +52,8 @@
   describe '#model_context_window' do
     it 'returns correct context window for known models' do
       expect(described_class.model_context_window('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq(8192)
-      expect(described_class.model_context_window('Qwen/Qwen2.5-0.5B-Instruct')).to eq(32_768)
+      expect(described_class.model_context_window('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq(32_768)
+      expect(described_class.model_context_window('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to eq(2048)
     end
 
     it 'returns default for unknown models' do
@@ -89,7 +90,7 @@
 
   describe '#model_families' do
     it 'returns supported model families' do
-      expect(described_class.model_families).to eq(%w[gemma qwen])
+      expect(described_class.model_families).to eq(%w[gemma llama mistral])
     end
   end
 

From 54b9834154cb364de812d71a745a9c06c548e40b Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Tue, 9 Sep 2025 16:58:03 -0700
Subject: [PATCH 22/38] Deep merge chat options

---
 lib/ruby_llm/providers/red_candle/chat.rb | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 30b7347f6..7086d86ad 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -8,14 +8,17 @@ module Chat
         # Override the base complete method to handle local execution
         def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
           _ = headers # Interface compatibility
-          payload = render_payload(
-            messages,
-            tools: tools,
-            temperature: temperature,
-            model: model,
-            stream: block_given?,
-            schema: schema
-          ).merge(params)
+          payload = Utils.deep_merge(
+            render_payload(
+              messages,
+              tools: tools,
+              temperature: temperature,
+              model: model,
+              stream: block_given?,
+              schema: schema
+            ),
+            params
+          )
 
           if block_given?
             perform_streaming_completion!(payload, &)

From c78ce4054c2dfd4792ea27ef53e9a8e0f2433d02 Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Tue, 9 Sep 2025 17:57:13 -0700
Subject: [PATCH 23/38] make red-candle off by default

---
 CONTRIBUTING.md | 2 +-
 Gemfile         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1bdc76389..fc256d27c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -82,7 +82,7 @@ RED_CANDLE_REAL_INFERENCE=true bundle exec rspec
 
 **To switch back to working without Red Candle:**
 ```bash
-bundle config set --local without red_candle
+bundle config unset with
 bundle install
 ```
 
diff --git a/Gemfile b/Gemfile
index 8d5470699..0cfcba3b5 100644
--- a/Gemfile
+++ b/Gemfile
@@ -44,6 +44,6 @@ end
 
 # Optional group for Red Candle provider (requires Rust toolchain)
 # To include: bundle config set --local with red-candle
-group :red_candle do
+group :red_candle, optional: true do
   gem 'red-candle', '~> 1.2'
 end

From 6816be9a5d704e75dd90778a9f3eb4824f18929f Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Tue, 9 Sep 2025 22:58:22 -0700
Subject: [PATCH 24/38] improve error messages

---
 lib/ruby_llm/providers/red_candle/chat.rb | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 7086d86ad..64a5979aa 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -158,7 +158,21 @@ def load_model(model_id)
             ::Candle::LLM.from_pretrained(model_id, device: @device)
           end
         rescue StandardError => e
-          raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
+          if e.message.include?('Failed to find tokenizer')
+            raise Error.new(nil,
+                            "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \
+                            "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \
+                            "If it requires authentication, login with: huggingface-cli login\n" \
+                            "Original error: #{e.message}")
+          elsif e.message.include?('Failed to find model')
+            raise Error.new(nil,
+                            "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \
+                            "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \
+                            "If it requires authentication, login with: huggingface-cli login\n" \
+                            "Original error: #{e.message}")
+          else
+            raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
+          end
         end
 
         def format_messages(messages)

From a258a398b616944ea3dd28bc7856813236c03caf Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Tue, 9 Sep 2025 23:06:48 -0700
Subject: [PATCH 25/38] improved error message

---
 lib/ruby_llm/providers/red_candle/chat.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 64a5979aa..935234d3a 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -162,12 +162,14 @@ def load_model(model_id)
             raise Error.new(nil,
                             "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \
                             "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \
+                            "And that you have accepted the terms of service for the tokenizer.\n" \
                             "If it requires authentication, login with: huggingface-cli login\n" \
                             "Original error: #{e.message}")
           elsif e.message.include?('Failed to find model')
             raise Error.new(nil,
                             "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \
                             "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \
+                            "And that you have accepted the terms of service for the model.\n" \
                             "If it requires authentication, login with: huggingface-cli login\n" \
                             "Original error: #{e.message}")
           else

From 004563e1abfb50ba32e471fcac6a375ffc0371eb Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Wed, 10 Sep 2025 22:34:26 -0700
Subject: [PATCH 26/38] add additional models

---
 .../providers/red_candle/capabilities.rb      |  4 +--
 lib/ruby_llm/providers/red_candle/models.rb   | 19 +++++++++++++
 .../providers/red_candle/capabilities_spec.rb |  2 +-
 .../providers/red_candle/models_spec.rb       | 28 +++++++++----------
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index 6a24fa3f1..40ad397f8 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -51,7 +51,7 @@ def model_context_window(model_id)
           case model_id
           when /gemma-3-4b/i
             8192
-          when /mistral-7b/i
+          when /qwen2\.5-1\.5b/i, /mistral-7b/i
             32_768
           when /tinyllama/i
             2048
@@ -107,7 +107,7 @@ def supports_stop_sequences?
         end
 
         def model_families
-          %w[gemma llama mistral]
+          %w[gemma llama qwen2 mistral phi]
         end
 
         def available_on_platform?
diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb
index 9ea847538..fbfc8a038 100644
--- a/lib/ruby_llm/providers/red_candle/models.rb
+++ b/lib/ruby_llm/providers/red_candle/models.rb
@@ -38,6 +38,25 @@ module Models
             architecture: 'mistral',
             supports_chat: true,
             supports_structured: true
+          },
+          {
+            id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF',
+            name: 'Qwen 2.1.5B Instruct (Quantized)',
+            gguf_file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf',
+            context_window: 32_768,
+            family: 'qwen2',
+            architecture: 'qwen2',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'microsoft/Phi-3-mini-4k-instruct',
+            name: 'Phi 3',
+            context_window: 4096,
+            family: 'phi',
+            architecture: 'phi',
+            supports_chat: true,
+            supports_structured: true
           }
         ].freeze
 
diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
index 6fe9d75b4..2b9bf8875 100644
--- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
@@ -90,7 +90,7 @@
 
   describe '#model_families' do
     it 'returns supported model families' do
-      expect(described_class.model_families).to eq(%w[gemma llama mistral])
+      expect(described_class.model_families).to eq(%w[gemma llama qwen2 mistral phi])
     end
   end
 
diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb
index 9a771be34..8b30dbf42 100644
--- a/spec/ruby_llm/providers/red_candle/models_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/models_spec.rb
@@ -16,7 +16,7 @@
     it 'returns an array of supported models' do
       models = provider.models
       expect(models).to be_an(Array)
-      expect(models.size).to eq(3)
+      expect(models.size).to eq(5)
       expect(models.first).to be_a(RubyLLM::Model::Info)
     end
 
@@ -24,16 +24,16 @@
       model_ids = provider.models.map(&:id)
       expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf')
       expect(model_ids).to include('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')
-      expect(model_ids).to include('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
+      expect(model_ids).to include('Qwen/Qwen2.5-1.5B-Instruct-GGUF')
     end
   end
 
   describe '#model' do
     context 'with a valid model ID' do
       it 'returns the model' do
-        model = provider.model('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
+        model = provider.model('Qwen/Qwen2.5-1.5B-Instruct-GGUF')
         expect(model).to be_a(RubyLLM::Model::Info)
-        expect(model.id).to eq('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
+        expect(model.id).to eq('Qwen/Qwen2.5-1.5B-Instruct-GGUF')
       end
     end
 
@@ -50,7 +50,7 @@
   describe '#model_available?' do
     it 'returns true for supported models' do
       expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.model_available?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
+      expect(provider.model_available?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true
     end
 
     it 'returns false for unsupported models' do
@@ -60,12 +60,12 @@
 
   describe '#model_info' do
     it 'returns model information' do
-      info = provider.model_info('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')
+      info = provider.model_info('Qwen/Qwen2.5-1.5B-Instruct-GGUF')
       expect(info).to include(
-        id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
-        name: 'Mistral 7B Instruct v0.2 (Quantized)',
+        id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF',
+        name: 'Qwen 2.1.5B Instruct (Quantized)',
         context_window: 32_768,
-        family: 'mistral',
+        family: 'qwen2',
         supports_chat: true,
         supports_structured: true
       )
@@ -81,9 +81,9 @@
       expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf')
     end
 
-    it 'returns the GGUF file for Mistral model' do
-      model_id = 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
-      gguf_file = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf'
+    it 'returns the GGUF file for Qwen model' do
+      model_id = 'Qwen/Qwen2.5-1.5B-Instruct-GGUF'
+      gguf_file = 'qwen2.5-1.5b-instruct-q4_k_m.gguf'
       expect(provider.gguf_file_for(model_id)).to eq(gguf_file)
     end
 
@@ -95,7 +95,7 @@
   describe '#supports_chat?' do
     it 'returns true for all current models' do
       expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.supports_chat?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
+      expect(provider.supports_chat?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true
       expect(provider.supports_chat?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true
     end
   end
@@ -103,7 +103,7 @@
   describe '#supports_structured?' do
     it 'returns true for all current models' do
       expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true
-      expect(provider.supports_structured?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true
+      expect(provider.supports_structured?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true
       expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true
     end
   end

From c4895d6c0747be6223c98901f5e564be4076a3f2 Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Thu, 11 Sep 2025 12:59:20 -0700
Subject: [PATCH 27/38] seperate out tokenizers from gguf

---
 lib/ruby_llm/providers/red_candle/chat.rb | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 935234d3a..7afa2f4c8 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -147,16 +147,10 @@ def load_model(model_id)
           gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil
           tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil
 
-          if gguf_file
-            # For GGUF models, use the tokenizer if specified, otherwise use model_id
-            options = { device: @device, gguf_file: gguf_file }
-            options[:tokenizer] = tokenizer if tokenizer
-
-            ::Candle::LLM.from_pretrained(model_id, **options)
-          else
-            # For regular models, use from_pretrained without gguf_file
-            ::Candle::LLM.from_pretrained(model_id, device: @device)
-          end
+          options = { device: @device }
+          options[:gguf_file] = gguf_file if gguf_file
+          options[:tokenizer] = tokenizer if tokenizer
+          ::Candle::LLM.from_pretrained(model_id, **options)
         rescue StandardError => e
           if e.message.include?('Failed to find tokenizer')
             raise Error.new(nil,

From 0dc8e9a833c84eade13d3a8fc5cbd987dc664605 Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Thu, 11 Sep 2025 13:01:08 -0700
Subject: [PATCH 28/38] more complete error message

---
 lib/ruby_llm/providers/red_candle/chat.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 7afa2f4c8..0ebc3190b 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -158,6 +158,7 @@ def load_model(model_id)
                             "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \
                             "And that you have accepted the terms of service for the tokenizer.\n" \
                             "If it requires authentication, login with: huggingface-cli login\n" \
+                            "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \
                             "Original error: #{e.message}")
           elsif e.message.include?('Failed to find model')
             raise Error.new(nil,
@@ -165,6 +166,7 @@ def load_model(model_id)
                             "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \
                             "And that you have accepted the terms of service for the model.\n" \
                             "If it requires authentication, login with: huggingface-cli login\n" \
+                            "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \
                             "Original error: #{e.message}")
           else
             raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")

From 8c87b591c56c56ffe30c91a354348c1f68593e02 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Thu, 11 Sep 2025 13:33:48 -0700
Subject: [PATCH 29/38] Working on documentation

---
 docs/_advanced/models.md               | 27 ++++++++++++++++++++++
 docs/_getting_started/configuration.md | 32 ++++++++++++++++++++++++++
 docs/_reference/available-models.md    | 18 +++++++++++++++
 docs/index.md                          |  5 +++-
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/docs/_advanced/models.md b/docs/_advanced/models.md
index dcd446de2..8ab8c57a1 100644
--- a/docs/_advanced/models.md
+++ b/docs/_advanced/models.md
@@ -95,6 +95,33 @@ RubyLLM.models.refresh!(remote_only: true)
 
 This is useful when you want to refresh only cloud-based models without querying local model servers.
 
+### Dynamic Model Registration (Red Candle)
+
+Some providers register their models dynamically at runtime rather than through the models.json file. Red Candle is one such provider - it registers its GGUF models when the gem is loaded.
+
+**How Red Candle Models Work:**
+
+1. **Not in models.json**: Red Candle models don't appear in the static models.json file since they're only available when the gem is installed.
+
+2. **Dynamic Registration**: When ruby_llm.rb loads and Red Candle is available, it adds models to the in-memory registry:
+   ```ruby
+   # This happens automatically in lib/ruby_llm.rb
+   RubyLLM::Providers::RedCandle.models.each do |model|
+     RubyLLM.models.instance_variable_get(:@models) << model
+   end
+   ```
+
+3. **Excluded from refresh!**: The `refresh!(remote_only: true)` flag excludes Red Candle and other local providers.
+
+4. **Currently Supported Models**:
+   - `google/gemma-3-4b-it-qat-q4_0-gguf`
+   - `TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF`
+   - `TheBloke/Mistral-7B-Instruct-v0.2-GGUF`
+   - `Qwen/Qwen2.5-1.5B-Instruct-GGUF`
+   - `microsoft/Phi-3-mini-4k-instruct`
+
+Red Candle models are only available when the gem is installed with the red_candle group enabled. See the [Configuration Guide]({% link _getting_started/configuration.md %}) for installation instructions.
+
 **For Gem Development:**
 
 The `rake models:update` task is designed for gem maintainers and updates the `models.json` file shipped with the gem:
diff --git a/docs/_getting_started/configuration.md b/docs/_getting_started/configuration.md
index 6af299ca4..933f6488d 100644
--- a/docs/_getting_started/configuration.md
+++ b/docs/_getting_started/configuration.md
@@ -64,6 +64,7 @@ RubyLLM.configure do |config|
   config.ollama_api_base = 'http://localhost:11434/v1'
   config.gpustack_api_base = ENV['GPUSTACK_API_BASE']
   config.gpustack_api_key = ENV['GPUSTACK_API_KEY']
+  # Red Candle (optional - see below)
 
   # AWS Bedrock (uses standard AWS credential chain if not set)
   config.bedrock_api_key = ENV['AWS_ACCESS_KEY_ID']
@@ -90,6 +91,37 @@ end
 
 These headers are optional and only needed for organization-specific billing or project tracking.
 
+### Red Candle (Local GGUF Models)
+
+Red Candle is an optional provider that enables local execution of quantized GGUF models. To use it, add the red-candle gem to your Gemfile:
+
+```ruby
+# Gemfile
+gem 'ruby_llm'
+gem 'red-candle'  # Optional: for local GGUF model execution
+```
+
+Then install:
+
+```bash
+bundle install
+```
+
+Red Candle requires no API keys since it runs models locally. Some models may require HuggingFace authentication:
+
+```bash
+huggingface-cli login  # Required for some gated models
+```
+
+See [Red Candle's HuggingFace guide](https://github.com/scientist-labs/red-candle/blob/main/docs/HUGGINGFACE.md) for details on authentication.
+
+Once configured, you can use it like any other provider:
+
+```ruby
+chat = RubyLLM.chat(model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', provider: :red_candle)
+response = chat.ask("Hello!")
+```
+
 ## Custom Endpoints
 
 ### OpenAI-Compatible APIs
diff --git a/docs/_reference/available-models.md b/docs/_reference/available-models.md
index c84ccd1ee..86a52ba7d 100644
--- a/docs/_reference/available-models.md
+++ b/docs/_reference/available-models.md
@@ -27,6 +27,7 @@ redirect_from:
 - **OpenRouter**: Direct API
 - **Others**: Local capabilities files
 
+
 ## Last Updated
 {: .d-inline-block }
 
@@ -2515,3 +2516,20 @@ Models that generate embeddings:
 | text-moderation-latest | openai | - | 32768 | - |
 | text-moderation-stable | openai | - | 32768 | - |
 
+
+## Local Providers
+
+### Red Candle (5)
+
+Red Candle enables local execution of quantized GGUF models. These models run on your machine with no API costs.
+
+| Model | Provider | Context | Max Output | Standard Pricing (per 1M tokens) |
+| :-- | :-- | --: | --: | :-- |
+| google/gemma-3-4b-it-qat-q4_0-gguf | red_candle | 8192 | 512 | Free (local execution) |
+| TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF | red_candle | 2048 | 512 | Free (local execution) |
+| TheBloke/Mistral-7B-Instruct-v0.2-GGUF | red_candle | 32768 | 512 | Free (local execution) |
+| Qwen/Qwen2.5-1.5B-Instruct-GGUF | red_candle | 32768 | 512 | Free (local execution) |
+| microsoft/Phi-3-mini-4k-instruct | red_candle | 4096 | 512 | Free (local execution) |
+
+> **Note:** Local providers (Ollama, GPUStack, Red Candle) register their models dynamically at runtime based on what's installed locally. Ollama and GPUStack models depend on what you've pulled or configured on your system. Red Candle requires the `red-candle` gem. See the [Configuration Guide]({% link _getting_started/configuration.md %}) for setup instructions.
+{: .note }
diff --git a/docs/index.md b/docs/index.md
index c057f580d..b664e5d72 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -67,6 +67,10 @@ permalink: /
     <img src="https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/vertexai-color.svg" alt="VertexAI" class="logo-medium">
     <img src="https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/vertexai-text.svg" alt="VertexAI" class="logo-small">
   </div>
+  <div class="provider-logo">
+    <img src="https://raw.githubusercontent.com/scientist-labs/red-candle/refs/heads/main/docs/assets/red-candle-icon.svg" alt="red-candle" class="logo-medium">
+    <img src="https://raw.githubusercontent.com/scientist-labs/red-candle/refs/heads/main/docs/assets/red-candle-text.svg" alt="red-candle" class="logo-small">
+  </div>
 </div>
 
 <div class="badge-container">
@@ -204,4 +208,3 @@ end
 chat = Chat.create! model_id: "claude-sonnet-4"
 chat.ask "What's in this file?", with: "report.pdf"
 ```
-

From d437f7398bdc8a9ed649d49e05376bd09c9f9922 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Thu, 11 Sep 2025 13:44:13 -0700
Subject: [PATCH 30/38] red-candle is optional

---
 gemfiles/rails_7.1.gemfile | 2 +-
 gemfiles/rails_7.2.gemfile | 2 +-
 gemfiles/rails_8.0.gemfile | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile
index 6d59f55ff..39d07214e 100644
--- a/gemfiles/rails_7.1.gemfile
+++ b/gemfiles/rails_7.1.gemfile
@@ -35,7 +35,7 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
+group :red_candle, optional: true do
   gem "red-candle", "~> 1.2"
 end
 
diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile
index d6ee5df4c..b216fc61a 100644
--- a/gemfiles/rails_7.2.gemfile
+++ b/gemfiles/rails_7.2.gemfile
@@ -35,7 +35,7 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
+group :red_candle, optional: true do
   gem "red-candle", "~> 1.2"
 end
 
diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile
index 903177b2a..abd42e7e3 100644
--- a/gemfiles/rails_8.0.gemfile
+++ b/gemfiles/rails_8.0.gemfile
@@ -35,7 +35,7 @@ group :development do
   gem "googleauth"
 end
 
-group :red_candle do
+group :red_candle, optional: true do
   gem "red-candle", "~> 1.2"
 end
 

From 9bdb43408e9d70122499685f4925e25496f73501 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Thu, 11 Sep 2025 13:44:42 -0700
Subject: [PATCH 31/38] require 'candle' is standard

---
 spec/spec_helper.rb            | 2 +-
 spec/support/models_to_test.rb | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 46c26a58e..4e621c40d 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -21,7 +21,7 @@
 
 # Handle Red Candle provider based on availability and environment
 begin
-  require 'red-candle'
+  require 'candle'
 
   # Red Candle gem is installed
   if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true'
diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb
index 173866f88..02b8baf7f 100644
--- a/spec/support/models_to_test.rb
+++ b/spec/support/models_to_test.rb
@@ -17,7 +17,7 @@
 
 # Only include Red Candle models if the gem is available
 begin
-  require 'red-candle'
+  require 'candle'
   chat_models << { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' }
 rescue LoadError
   # Red Candle not available - don't include its models

From d52e26e7c615defc9d72cbc69b4310d8377a8876 Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Fri, 12 Sep 2025 11:26:40 -0700
Subject: [PATCH 32/38] rubocop

---
 lib/ruby_llm/providers/red_candle/chat.rb | 53 ++++++++++++++---------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 0ebc3190b..44cc4b695 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -141,38 +141,49 @@ def ensure_model_loaded!(model_id)
           @loaded_models[model_id] ||= load_model(model_id)
         end
 
-        def load_model(model_id)
+        def model_options(model_id)
           # Get GGUF file and tokenizer if this is a GGUF model
           # Access the methods from the Models module which is included in the provider
-          gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil
-          tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil
-
           options = { device: @device }
-          options[:gguf_file] = gguf_file if gguf_file
-          options[:tokenizer] = tokenizer if tokenizer
-          ::Candle::LLM.from_pretrained(model_id, **options)
+          options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
+          options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
+          options
+        end
+
+        def load_model(model_id)
+          ::Candle::LLM.from_pretrained(model_id, **model_options(model_id))
         rescue StandardError => e
           if e.message.include?('Failed to find tokenizer')
-            raise Error.new(nil,
-                            "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \
-                            "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \
-                            "And that you have accepted the terms of service for the tokenizer.\n" \
-                            "If it requires authentication, login with: huggingface-cli login\n" \
-                            "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \
-                            "Original error: #{e.message}")
+            raise Error.new(nil, token_error_message(e, options[:tokenizer]))
           elsif e.message.include?('Failed to find model')
-            raise Error.new(nil,
-                            "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \
-                            "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \
-                            "And that you have accepted the terms of service for the model.\n" \
-                            "If it requires authentication, login with: huggingface-cli login\n" \
-                            "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \
-                            "Original error: #{e.message}")
+            raise Error.new(nil, model_error_message(e, model_id))
           else
             raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
           end
         end
 
+        def token_error_message(exception, tokenizer)
+          <<~ERROR_MESSAGE
+            Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
+            Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
+            And that you have accepted the terms of service for the tokenizer.
+            If it requires authentication, login with: huggingface-cli login
+            See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
+            Original error: #{exception.message}"
+          ERROR_MESSAGE
+        end
+
+        def model_error_message(exception, model_id)
+          <<~ERROR_MESSAGE
+            Failed to load model #{model_id}: #{exception.message}
+            Please verify the model exists at: https://huggingface.co/#{model_id}
+            And that you have accepted the terms of service for the model.
+            If it requires authentication, login with: huggingface-cli login
+            See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
+            Original error: #{e.message}"
+          ERROR_MESSAGE
+        end
+
         def format_messages(messages)
           messages.map do |msg|
             # Handle both hash and Message objects

From 8ec93e8819249b6c3c9e06650b4e00d843e51256 Mon Sep 17 00:00:00 2001
From: Rob Kaufman <rob@notch8.com>
Date: Fri, 12 Sep 2025 11:44:13 -0700
Subject: [PATCH 33/38] use a spec helper

---
 spec/spec_helper.rb               | 38 +------------------------------
 spec/support/red_candle_loader.rb | 38 +++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 37 deletions(-)
 create mode 100644 spec/support/red_candle_loader.rb

diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 4e621c40d..b8796653c 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -18,40 +18,4 @@
 require_relative 'support/models_to_test'
 require_relative 'support/streaming_error_helpers'
 require_relative 'support/provider_capabilities_helper'
-
-# Handle Red Candle provider based on availability and environment
-begin
-  require 'candle'
-
-  # Red Candle gem is installed
-  if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true'
-    # Use real inference - don't load the test helper
-    RSpec.configure do |config|
-      config.before(:suite) do
-        puts "\n🔥 Red Candle: Using REAL inference (this will be slow)"
-        puts "   To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n"
-      end
-    end
-  else
-    # Use stubs (default when gem is installed)
-    require_relative 'support/red_candle_test_helper'
-  end
-rescue LoadError
-  # Red Candle gem not installed - skip tests
-  RSpec.configure do |config|
-    config.before do |example|
-      # Skip Red Candle provider tests when gem not installed
-      test_description = example.full_description.to_s
-      if example.metadata[:file_path]&.include?('providers/red_candle') ||
-         example.metadata[:described_class]&.to_s&.include?('RedCandle') ||
-         test_description.include?('red_candle/')
-        skip 'Red Candle not installed (run: bundle config set --local with red-candle && bundle install)'
-      end
-    end
-
-    config.before(:suite) do
-      puts "\n⚠️  Red Candle: Provider not available (gem not installed)"
-      puts "   To enable: bundle config set --local with red-candle && bundle install\n\n"
-    end
-  end
-end
+require_relative 'support/red_candle_loader'
diff --git a/spec/support/red_candle_loader.rb b/spec/support/red_candle_loader.rb
new file mode 100644
index 000000000..b50ca863b
--- /dev/null
+++ b/spec/support/red_candle_loader.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+# Handle Red Candle provider based on availability and environment
+begin
+  require 'candle'
+
+  # Red Candle gem is installed
+  if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true'
+    # Use real inference - don't load the test helper
+    RSpec.configure do |config|
+      config.before(:suite) do
+        puts "\n🔥 Red Candle: Using REAL inference (this will be slow)"
+        puts "   To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n"
+      end
+    end
+  else
+    # Use stubs (default when gem is installed)
+    require_relative 'support/red_candle_test_helper'
+  end
+rescue LoadError
+  # Red Candle gem not installed - skip tests
+  RSpec.configure do |config|
+    config.before do |example|
+      # Skip Red Candle provider tests when gem not installed
+      test_description = example.full_description.to_s
+      if example.metadata[:file_path]&.include?('providers/red_candle') ||
+         example.metadata[:described_class]&.to_s&.include?('RedCandle') ||
+         test_description.include?('red_candle/')
+        skip 'Red Candle not installed (run: bundle config set --local with red_candle && bundle install)'
+      end
+    end
+
+    config.before(:suite) do
+      puts "\n⚠️  Red Candle: Provider not available (gem not installed)"
+      puts "   To enable: bundle config set --local with red-candle && bundle install\n\n"
+    end
+  end
+end

From d1696ffd15082a8f0ff6a8c11b64ebd84919180d Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Fri, 12 Sep 2025 10:52:13 -0700
Subject: [PATCH 34/38] Remove the too cute pricing method

---
 lib/ruby_llm/providers/red_candle/capabilities.rb      | 10 ----------
 .../ruby_llm/providers/red_candle/capabilities_spec.rb | 10 ----------
 2 files changed, 20 deletions(-)

diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index 40ad397f8..3311ee9fe 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -60,16 +60,6 @@ def model_context_window(model_id)
           end
         end
 
-        def pricing
-          # Local execution - no API costs
-          {
-            input_tokens_per_dollar: Float::INFINITY,
-            output_tokens_per_dollar: Float::INFINITY,
-            input_price_per_million_tokens: 0.0,
-            output_price_per_million_tokens: 0.0
-          }
-        end
-
         def default_max_tokens
           512
         end
diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
index 2b9bf8875..03bb49f25 100644
--- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
+++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb
@@ -61,16 +61,6 @@
     end
   end
 
-  describe '#pricing' do
-    it 'returns infinite tokens per dollar for local execution' do
-      pricing = described_class.pricing
-      expect(pricing[:input_tokens_per_dollar]).to eq(Float::INFINITY)
-      expect(pricing[:output_tokens_per_dollar]).to eq(Float::INFINITY)
-      expect(pricing[:input_price_per_million_tokens]).to eq(0.0)
-      expect(pricing[:output_price_per_million_tokens]).to eq(0.0)
-    end
-  end
-
   describe 'generation parameters' do
     it 'provides correct defaults and limits' do
       expect(described_class.default_max_tokens).to eq(512)

From 62a038986b100fb56fa671670f12bc71b7ec3bab Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Fri, 12 Sep 2025 12:42:23 -0700
Subject: [PATCH 35/38] Fix the comment for
 RubyLLM::Providers::RedCandle::Capabilities

---
 lib/ruby_llm/providers/red_candle/capabilities.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb
index 3311ee9fe..ec0afb6b7 100644
--- a/lib/ruby_llm/providers/red_candle/capabilities.rb
+++ b/lib/ruby_llm/providers/red_candle/capabilities.rb
@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     class RedCandle
-      # Determines capabilities and pricing for RedCandle models
+      # Determines capabilities for RedCandle models
       module Capabilities
         module_function
 

From 90128bbfff164a089fcf78a564c0f6e82cc43336 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Fri, 12 Sep 2025 15:54:29 -0700
Subject: [PATCH 36/38] Make the require_relative actually relative

---
 spec/support/red_candle_loader.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/support/red_candle_loader.rb b/spec/support/red_candle_loader.rb
index b50ca863b..b4fb00b4b 100644
--- a/spec/support/red_candle_loader.rb
+++ b/spec/support/red_candle_loader.rb
@@ -15,7 +15,7 @@
     end
   else
     # Use stubs (default when gem is installed)
-    require_relative 'support/red_candle_test_helper'
+    require_relative 'red_candle_test_helper'
   end
 rescue LoadError
   # Red Candle gem not installed - skip tests

From 9ab992dd25f3668f821ef161395d29f25a8961e6 Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Sat, 13 Sep 2025 09:59:35 -0700
Subject: [PATCH 37/38] Updatae to red-candle 1.3.0 to support ruby 3.1

---
 Gemfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Gemfile b/Gemfile
index 0cfcba3b5..c6d0742ae 100644
--- a/Gemfile
+++ b/Gemfile
@@ -45,5 +45,5 @@ end
 # Optional group for Red Candle provider (requires Rust toolchain)
 # To include: bundle config set --local with red-candle
 group :red_candle, optional: true do
-  gem 'red-candle', '~> 1.2'
+  gem 'red-candle', '~> 1.3'
 end

From 922e0e93f20c067e0c0fc7ecd2bf124d1a46f13b Mon Sep 17 00:00:00 2001
From: Chris Petersen <chris@petersen.io>
Date: Sat, 13 Sep 2025 10:06:53 -0700
Subject: [PATCH 38/38] Update the comment

---
 lib/ruby_llm/providers/red_candle/chat.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb
index 44cc4b695..915c2075b 100644
--- a/lib/ruby_llm/providers/red_candle/chat.rb
+++ b/lib/ruby_llm/providers/red_candle/chat.rb
@@ -25,7 +25,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
           else
             result = perform_completion!(payload)
             # Convert to Message object for compatibility
-            # Red Candle doesn't provide token counts, but we can estimate them
+            # Red Candle doesn't provide token counts by default, but we can estimate them
             content = result[:content]
             # Rough estimation: ~4 characters per token
             estimated_output_tokens = (content.length / 4.0).round