From 5e8c1bb7fe03f1a3cd9d7b81043ffa7161bf2a4f Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 14:08:25 -0700 Subject: [PATCH 01/38] Initial red-candle provider implementation --- lib/ruby_llm.rb | 9 + lib/ruby_llm/configuration.rb | 4 + lib/ruby_llm/providers/red_candle.rb | 70 ++++++ .../providers/red_candle/capabilities.rb | 122 ++++++++++ lib/ruby_llm/providers/red_candle/chat.rb | 168 ++++++++++++++ lib/ruby_llm/providers/red_candle/models.rb | 81 +++++++ .../providers/red_candle/streaming.rb | 39 ++++ ruby_llm.gemspec | 2 + .../providers/red_candle/capabilities_spec.rb | 117 ++++++++++ .../providers/red_candle/chat_spec.rb | 209 ++++++++++++++++++ .../providers/red_candle/models_spec.rb | 103 +++++++++ spec/ruby_llm/providers/red_candle_spec.rb | 76 +++++++ 12 files changed, 1000 insertions(+) create mode 100644 lib/ruby_llm/providers/red_candle.rb create mode 100644 lib/ruby_llm/providers/red_candle/capabilities.rb create mode 100644 lib/ruby_llm/providers/red_candle/chat.rb create mode 100644 lib/ruby_llm/providers/red_candle/models.rb create mode 100644 lib/ruby_llm/providers/red_candle/streaming.rb create mode 100644 spec/ruby_llm/providers/red_candle/capabilities_spec.rb create mode 100644 spec/ruby_llm/providers/red_candle/chat_spec.rb create mode 100644 spec/ruby_llm/providers/red_candle/models_spec.rb create mode 100644 spec/ruby_llm/providers/red_candle_spec.rb diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb index 9d88eb8dc..0db7f8743 100644 --- a/lib/ruby_llm.rb +++ b/lib/ruby_llm.rb @@ -89,6 +89,15 @@ def logger RubyLLM::Provider.register :perplexity, RubyLLM::Providers::Perplexity RubyLLM::Provider.register :vertexai, RubyLLM::Providers::VertexAI +# Optional Red Candle provider - only available if gem is installed +begin + require 'candle' + require 'ruby_llm/providers/red_candle' + RubyLLM::Provider.register :red_candle, RubyLLM::Providers::RedCandle +rescue LoadError + # Red Candle is optional - provider won't be available if gem isn't installed +end + if defined?(Rails::Railtie) require 'ruby_llm/railtie' require 'ruby_llm/active_record/acts_as' diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 34a842c2f..89d79e90f 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -23,6 +23,10 @@ class Configuration :gpustack_api_base, :gpustack_api_key, :mistral_api_key, + # Red Candle configuration + :red_candle_device, + :red_candle_cache_dir, + :red_candle_debug, # Default models :default_model, :default_embedding_model, diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb new file mode 100644 index 000000000..f200e4af4 --- /dev/null +++ b/lib/ruby_llm/providers/red_candle.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + # Red Candle provider for local LLM execution using the Candle Rust crate. + class RedCandle < Provider + include RedCandle::Chat + include RedCandle::Models + include RedCandle::Capabilities + include RedCandle::Streaming + + def initialize(config) + ensure_red_candle_available! + super + @loaded_models = {} # Cache for loaded models + @device = determine_device(config) + end + + def api_base + nil # Local execution, no API base needed + end + + def headers + {} # No HTTP headers needed + end + + class << self + def capabilities + RedCandle::Capabilities + end + + def configuration_requirements + [] # No required config, device is optional + end + + def local? + true + end + end + + private + + def ensure_red_candle_available! + require 'candle' + rescue LoadError + raise Error.new(nil, "Red Candle gem is not installed. Add 'gem \"red-candle\", \"~> 1.2.3\"' to your Gemfile.") + end + + def determine_device(config) + if config.red_candle_device + case config.red_candle_device.to_s.downcase + when 'cpu' + ::Candle::Device.cpu + when 'cuda', 'gpu' + ::Candle::Device.cuda + when 'metal' + ::Candle::Device.metal + else + ::Candle::Device.best + end + else + ::Candle::Device.best + end + rescue StandardError => e + RubyLLM.logger.warn "Failed to initialize device: #{e.message}. Falling back to CPU." + ::Candle::Device.cpu + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb new file mode 100644 index 000000000..3266f1ebf --- /dev/null +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class RedCandle + module Capabilities + extend self + + def supports_vision? + false + end + + def supports_functions? + false + end + + def supports_streaming? + true + end + + def supports_structured_output? + true + end + + def supports_regex_constraints? + true + end + + def supports_embeddings? + false # Future enhancement - Red Candle does support embedding models + end + + def supports_audio? + false + end + + def supports_pdf? + false + end + + def normalize_temperature(temperature, model_id) + # Red Candle uses standard 0-2 range + return 0.7 if temperature.nil? + + temperature = temperature.to_f + temperature.clamp(0.0, 2.0) + end + + def model_context_window(model_id) + case model_id + when /gemma-3-4b/i + 8192 + when /qwen2\.5-0\.5b/i + 32_768 + else + 4096 # Conservative default + end + end + + def pricing + # Local execution - no API costs + { + input_tokens_per_dollar: Float::INFINITY, + output_tokens_per_dollar: Float::INFINITY, + input_price_per_million_tokens: 0.0, + output_price_per_million_tokens: 0.0 + } + end + + def default_max_tokens + 512 + end + + def max_temperature + 2.0 + end + + def min_temperature + 0.0 + end + + def supports_temperature? + true + end + + def supports_top_p? + true + end + + def supports_top_k? + true + end + + def supports_repetition_penalty? + true + end + + def supports_seed? + true + end + + def supports_stop_sequences? + true + end + + def model_families + %w[gemma qwen] + end + + def available_on_platform? + # Check if Candle can be loaded + begin + require 'candle' + true + rescue LoadError + false + end + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb new file mode 100644 index 000000000..ec9d7c8ed --- /dev/null +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class RedCandle + module Chat + def render_payload(messages, tools:, temperature:, model:, stream:, schema:) + # Red Candle doesn't support tools + if tools && !tools.empty? + raise Error.new(nil, 'Red Candle provider does not support tool calling') + end + + { + messages: messages, + temperature: temperature, + model: model.id, + stream: stream, + schema: schema + } + end + + def perform_completion!(payload) + model = ensure_model_loaded!(payload[:model]) + messages = format_messages(payload[:messages]) + + # Apply chat template if available + prompt = if model.respond_to?(:apply_chat_template) + model.apply_chat_template(messages) + else + # Fallback to simple formatting + messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" + end + + # Configure generation + config_opts = { + temperature: payload[:temperature] || 0.7, + max_length: payload[:max_tokens] || 512 + } + + # Handle structured generation if schema provided + response = if payload[:schema] + generate_with_schema(model, prompt, payload[:schema], config_opts) + else + model.generate( + prompt, + config: ::Candle::GenerationConfig.balanced(**config_opts) + ) + end + + format_response(response, payload[:schema]) + end + + def perform_streaming_completion!(payload, &block) + model = ensure_model_loaded!(payload[:model]) + messages = format_messages(payload[:messages]) + + # Apply chat template if available + prompt = if model.respond_to?(:apply_chat_template) + model.apply_chat_template(messages) + else + messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" + end + + # Configure generation + config = ::Candle::GenerationConfig.balanced( + temperature: payload[:temperature] || 0.7, + max_length: payload[:max_tokens] || 512 + ) + + # Stream tokens + buffer = '' + model.generate_stream(prompt, config: config) do |token| + buffer += token + chunk = format_stream_chunk(token) + block.call(chunk) + end + + # Send final chunk with finish reason + final_chunk = { + delta: { content: '' }, + finish_reason: 'stop' + } + block.call(final_chunk) + end + + private + + def ensure_model_loaded!(model_id) + @loaded_models[model_id] ||= load_model(model_id) + end + + def load_model(model_id) + # Handle GGUF models with specific files + if model_id == 'google/gemma-3-4b-it-qat-q4_0-gguf' + ::Candle::LLM.from_pretrained( + model_id, + device: @device, + gguf_file: 'gemma-3-4b-it-q4_0.gguf' + ) + else + ::Candle::LLM.from_pretrained(model_id, device: @device) + end + rescue StandardError => e + raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}") + end + + def format_messages(messages) + messages.map do |msg| + { + role: msg[:role].to_s, + content: extract_message_content(msg) + } + end + end + + def extract_message_content(message) + content = message[:content] + return content if content.is_a?(String) + + # Handle array content (e.g., with images) + if content.is_a?(Array) + content.map do |part| + part[:text] if part[:type] == 'text' + end.compact.join(' ') + else + content.to_s + end + end + + def generate_with_schema(model, prompt, schema, config_opts) + model.generate_structured( + prompt, + schema: schema, + **config_opts + ) + rescue StandardError => e + RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation." + model.generate( + prompt, + config: ::Candle::GenerationConfig.balanced(**config_opts) + ) + end + + def format_response(response, schema) + content = if schema && !response.is_a?(String) + # Structured response + JSON.generate(response) + else + response + end + + { + content: content, + role: 'assistant', + finish_reason: 'stop' + } + end + + def format_stream_chunk(token) + { + delta: { content: token }, + finish_reason: nil + } + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb new file mode 100644 index 000000000..f1d8a7754 --- /dev/null +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class RedCandle + module Models + SUPPORTED_MODELS = [ + { + id: 'google/gemma-3-4b-it-qat-q4_0-gguf', + name: 'Gemma 3 4B Instruct (Quantized)', + gguf_file: 'gemma-3-4b-it-q4_0.gguf', + context_window: 8192, + family: 'gemma', + architecture: 'gemma2', + supports_chat: true, + supports_structured: true + }, + { + id: 'Qwen/Qwen2.5-0.5B-Instruct', + name: 'Qwen 2.5 0.5B Instruct', + context_window: 32_768, + family: 'qwen', + architecture: 'qwen2', + supports_chat: true, + supports_structured: true + } + ].freeze + + def list_models + SUPPORTED_MODELS.map do |model_data| + Model::Info.new( + id: model_data[:id], + name: model_data[:name], + provider: slug, + family: model_data[:family], + context_window: model_data[:context_window], + capabilities: %w[streaming structured_output], + modalities: { input: %w[text], output: %w[text] } + ) + end + end + + def models + @models ||= list_models + end + + def model(id) + models.find { |m| m.id == id } || + raise(Error.new(nil, "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}")) + end + + def model_available?(id) + SUPPORTED_MODELS.any? { |m| m[:id] == id } + end + + def model_ids + SUPPORTED_MODELS.map { |m| m[:id] } + end + + def model_info(id) + SUPPORTED_MODELS.find { |m| m[:id] == id } + end + + def supports_chat?(model_id) + info = model_info(model_id) + info ? info[:supports_chat] : false + end + + def supports_structured?(model_id) + info = model_info(model_id) + info ? info[:supports_structured] : false + end + + def gguf_file_for(model_id) + info = model_info(model_id) + info ? info[:gguf_file] : nil + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/red_candle/streaming.rb b/lib/ruby_llm/providers/red_candle/streaming.rb new file mode 100644 index 000000000..f0598ce8e --- /dev/null +++ b/lib/ruby_llm/providers/red_candle/streaming.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class RedCandle + module Streaming + def stream(payload, &block) + if payload[:stream] + perform_streaming_completion!(payload, &block) + else + # Non-streaming fallback + result = perform_completion!(payload) + # Yield the complete result as a single chunk + chunk = { + content: result[:content], + role: result[:role], + finish_reason: result[:finish_reason] + } + block.call(chunk) + end + end + + private + + def stream_processor + # Red Candle handles streaming internally through blocks + # This method is here for compatibility with the base streaming interface + nil + end + + def process_stream_response(response) + # Red Candle doesn't use HTTP responses + # Streaming is handled directly in perform_streaming_completion! + response + end + end + end + end +end \ No newline at end of file diff --git a/ruby_llm.gemspec b/ruby_llm.gemspec index 3e6e6af20..cf92fcd44 100644 --- a/ruby_llm.gemspec +++ b/ruby_llm.gemspec @@ -41,4 +41,6 @@ Gem::Specification.new do |spec| spec.add_dependency 'faraday-retry', '>= 1' spec.add_dependency 'marcel', '~> 1.0' spec.add_dependency 'zeitwerk', '~> 2' + + spec.add_development_dependency 'red-candle', '~> 1.2' end diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb new file mode 100644 index 000000000..53c0b7e5e --- /dev/null +++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::RedCandle::Capabilities do + describe 'feature support' do + it 'does not support vision' do + expect(described_class.supports_vision?).to be false + end + + it 'does not support functions' do + expect(described_class.supports_functions?).to be false + end + + it 'supports streaming' do + expect(described_class.supports_streaming?).to be true + end + + it 'supports structured output' do + expect(described_class.supports_structured_output?).to be true + end + + it 'supports regex constraints' do + expect(described_class.supports_regex_constraints?).to be true + end + + it 'does not support embeddings yet' do + expect(described_class.supports_embeddings?).to be false + end + + it 'does not support audio' do + expect(described_class.supports_audio?).to be false + end + + it 'does not support PDF' do + expect(described_class.supports_pdf?).to be false + end + end + + describe '#normalize_temperature' do + it 'returns default temperature when nil' do + expect(described_class.normalize_temperature(nil, 'any_model')).to eq(0.7) + end + + it 'clamps temperature to valid range' do + expect(described_class.normalize_temperature(-1, 'any_model')).to eq(0.0) + expect(described_class.normalize_temperature(3, 'any_model')).to eq(2.0) + expect(described_class.normalize_temperature(1.5, 'any_model')).to eq(1.5) + end + end + + describe '#model_context_window' do + it 'returns correct context window for known models' do + expect(described_class.model_context_window('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq(8192) + expect(described_class.model_context_window('Qwen/Qwen2.5-0.5B-Instruct')).to eq(32_768) + end + + it 'returns default for unknown models' do + expect(described_class.model_context_window('unknown/model')).to eq(4096) + end + end + + describe '#pricing' do + it 'returns infinite tokens per dollar for local execution' do + pricing = described_class.pricing + expect(pricing[:input_tokens_per_dollar]).to eq(Float::INFINITY) + expect(pricing[:output_tokens_per_dollar]).to eq(Float::INFINITY) + expect(pricing[:input_price_per_million_tokens]).to eq(0.0) + expect(pricing[:output_price_per_million_tokens]).to eq(0.0) + end + end + + describe 'generation parameters' do + it 'provides correct defaults and limits' do + expect(described_class.default_max_tokens).to eq(512) + expect(described_class.max_temperature).to eq(2.0) + expect(described_class.min_temperature).to eq(0.0) + end + + it 'supports various generation parameters' do + expect(described_class.supports_temperature?).to be true + expect(described_class.supports_top_p?).to be true + expect(described_class.supports_top_k?).to be true + expect(described_class.supports_repetition_penalty?).to be true + expect(described_class.supports_seed?).to be true + expect(described_class.supports_stop_sequences?).to be true + end + end + + describe '#model_families' do + it 'returns supported model families' do + expect(described_class.model_families).to eq(%w[gemma qwen]) + end + end + + describe '#available_on_platform?' do + context 'when Candle is available' do + before do + allow(described_class).to receive(:require).with('candle').and_return(true) + end + + it 'returns true' do + expect(described_class.available_on_platform?).to be true + end + end + + context 'when Candle is not available' do + before do + allow(described_class).to receive(:require).with('candle').and_raise(LoadError) + end + + it 'returns false' do + expect(described_class.available_on_platform?).to be false + end + end + end +end \ No newline at end of file diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb new file mode 100644 index 000000000..513dd192c --- /dev/null +++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::RedCandle::Chat do + let(:config) { RubyLLM::Configuration.new } + let(:provider) { RubyLLM::Providers::RedCandle.new(config) } + let(:model) { provider.model('Qwen/Qwen2.5-0.5B-Instruct') } + + before(:all) do + begin + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' + end + end + + describe '#render_payload' do + let(:messages) { [{ role: 'user', content: 'Hello' }] } + + it 'creates a valid payload' do + payload = provider.render_payload( + messages, + tools: nil, + temperature: 0.7, + model: model, + stream: false, + schema: nil + ) + + expect(payload).to include( + messages: messages, + temperature: 0.7, + model: 'Qwen/Qwen2.5-0.5B-Instruct', + stream: false, + schema: nil + ) + end + + it 'raises error when tools are provided' do + tools = [{ name: 'calculator', description: 'Does math' }] + + expect do + provider.render_payload( + messages, + tools: tools, + temperature: 0.7, + model: model, + stream: false, + schema: nil + ) + end.to raise_error(RubyLLM::Error, /does not support tool calling/) + end + + it 'includes schema when provided' do + schema = { type: 'object', properties: { name: { type: 'string' } } } + + payload = provider.render_payload( + messages, + tools: nil, + temperature: 0.7, + model: model, + stream: false, + schema: schema + ) + + expect(payload[:schema]).to eq(schema) + end + end + + describe '#perform_completion!' do + let(:messages) { [{ role: 'user', content: 'Test message' }] } + let(:mock_model) { double('Candle::LLM') } + + before do + allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model) + allow(mock_model).to receive(:respond_to?).with(:apply_chat_template).and_return(true) + allow(mock_model).to receive(:apply_chat_template).and_return('formatted prompt') + end + + context 'with regular generation' do + it 'generates a response' do + allow(mock_model).to receive(:generate).and_return('Generated response') + + payload = { + messages: messages, + model: 'Qwen/Qwen2.5-0.5B-Instruct', + temperature: 0.7 + } + + result = provider.perform_completion!(payload) + + expect(result).to include( + content: 'Generated response', + role: 'assistant', + finish_reason: 'stop' + ) + end + end + + context 'with structured generation' do + it 'generates structured output' do + schema = { type: 'object', properties: { name: { type: 'string' } } } + structured_response = { 'name' => 'Alice' } + + allow(mock_model).to receive(:generate_structured).and_return(structured_response) + + payload = { + messages: messages, + model: 'Qwen/Qwen2.5-0.5B-Instruct', + temperature: 0.7, + schema: schema + } + + result = provider.perform_completion!(payload) + + expect(result[:content]).to eq(JSON.generate(structured_response)) + expect(result[:role]).to eq('assistant') + end + + it 'falls back to regular generation on structured failure' do + schema = { type: 'object', properties: { name: { type: 'string' } } } + + allow(mock_model).to receive(:generate_structured).and_raise(StandardError, 'Structured gen failed') + allow(mock_model).to receive(:generate).and_return('Fallback response') + allow(RubyLLM.logger).to receive(:warn) + + payload = { + messages: messages, + model: 'Qwen/Qwen2.5-0.5B-Instruct', + temperature: 0.7, + schema: schema + } + + result = provider.perform_completion!(payload) + + expect(result[:content]).to eq('Fallback response') + expect(RubyLLM.logger).to have_received(:warn).with(/Structured generation failed/) + end + end + end + + describe '#perform_streaming_completion!' do + let(:messages) { [{ role: 'user', content: 'Stream test' }] } + let(:mock_model) { double('Candle::LLM') } + + before do + allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model) + allow(mock_model).to receive(:respond_to?).with(:apply_chat_template).and_return(true) + allow(mock_model).to receive(:apply_chat_template).and_return('formatted prompt') + end + + it 'streams tokens and sends finish reason' do + tokens = %w[Hello world !] + chunks_received = [] + + allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block| + tokens.each { |token| block.call(token) } + end + + payload = { + messages: messages, + model: 'Qwen/Qwen2.5-0.5B-Instruct', + temperature: 0.7 + } + + provider.perform_streaming_completion!(payload) do |chunk| + chunks_received << chunk + end + + # Check token chunks + tokens.each_with_index do |token, i| + expect(chunks_received[i]).to include( + delta: { content: token }, + finish_reason: nil + ) + end + + # Check final chunk + expect(chunks_received.last).to include( + delta: { content: '' }, + finish_reason: 'stop' + ) + end + end + + describe 'message formatting' do + it 'handles string content' do + messages = [{ role: 'user', content: 'Simple text' }] + formatted = provider.send(:format_messages, messages) + + expect(formatted).to eq([{ role: 'user', content: 'Simple text' }]) + end + + it 'handles array content with text parts' do + messages = [{ + role: 'user', + content: [ + { type: 'text', text: 'Part 1' }, + { type: 'text', text: 'Part 2' }, + { type: 'image', url: 'ignored.jpg' } + ] + }] + + formatted = provider.send(:format_messages, messages) + expect(formatted).to eq([{ role: 'user', content: 'Part 1 Part 2' }]) + end + end +end \ No newline at end of file diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb new file mode 100644 index 000000000..8c89147bc --- /dev/null +++ b/spec/ruby_llm/providers/red_candle/models_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::RedCandle::Models do + let(:config) { RubyLLM::Configuration.new } + let(:provider) { RubyLLM::Providers::RedCandle.new(config) } + + before(:all) do + begin + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' + end + end + + describe '#models' do + it 'returns an array of supported models' do + models = provider.models + expect(models).to be_an(Array) + expect(models.size).to eq(2) + expect(models.first).to be_a(RubyLLM::Model::Info) + end + + it 'includes the expected model IDs' do + model_ids = provider.models.map(&:id) + expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf') + expect(model_ids).to include('Qwen/Qwen2.5-0.5B-Instruct') + end + end + + describe '#model' do + context 'with a valid model ID' do + it 'returns the model' do + model = provider.model('Qwen/Qwen2.5-0.5B-Instruct') + expect(model).to be_a(RubyLLM::Model::Info) + expect(model.id).to eq('Qwen/Qwen2.5-0.5B-Instruct') + end + end + + context 'with an invalid model ID' do + it 'raises an error' do + expect { provider.model('invalid/model') }.to raise_error( + RubyLLM::Error, + /Model invalid\/model not found/ + ) + end + end + end + + describe '#model_available?' do + it 'returns true for supported models' do + expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true + expect(provider.model_available?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + end + + it 'returns false for unsupported models' do + expect(provider.model_available?('gpt-4')).to be false + end + end + + describe '#model_info' do + it 'returns model information' do + info = provider.model_info('Qwen/Qwen2.5-0.5B-Instruct') + expect(info).to include( + id: 'Qwen/Qwen2.5-0.5B-Instruct', + name: 'Qwen 2.5 0.5B Instruct', + context_window: 32_768, + family: 'qwen', + supports_chat: true, + supports_structured: true + ) + end + + it 'returns nil for unknown models' do + expect(provider.model_info('unknown')).to be_nil + end + end + + describe '#gguf_file_for' do + it 'returns the GGUF file for Gemma model' do + expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf') + end + + it 'returns nil for non-GGUF models' do + expect(provider.gguf_file_for('Qwen/Qwen2.5-0.5B-Instruct')).to be_nil + end + end + + describe '#supports_chat?' do + it 'returns true for all current models' do + expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true + expect(provider.supports_chat?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + end + end + + describe '#supports_structured?' do + it 'returns true for all current models' do + expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true + expect(provider.supports_structured?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + end + end +end \ No newline at end of file diff --git a/spec/ruby_llm/providers/red_candle_spec.rb b/spec/ruby_llm/providers/red_candle_spec.rb new file mode 100644 index 000000000..8e1216976 --- /dev/null +++ b/spec/ruby_llm/providers/red_candle_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::RedCandle do + let(:config) { RubyLLM::Configuration.new } + let(:provider) { described_class.new(config) } + + # Skip all tests if Red Candle is not available + before(:all) do + begin + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' + end + end + + describe '#initialize' do + context 'when Red Candle is not available' do + before do + allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError) + end + + it 'raises an informative error' do + expect { described_class.new(config) }.to raise_error( + RubyLLM::Error, + /Red Candle gem is not installed/ + ) + end + end + + + context 'with device configuration' do + it 'uses the configured device' do + config.red_candle_device = 'cpu' + provider = described_class.new(config) + expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.cpu) + end + + it 'defaults to best device when not configured' do + provider = described_class.new(config) + expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.best) + end + end + end + + describe '#api_base' do + it 'returns nil for local execution' do + expect(provider.api_base).to be_nil + end + end + + describe '#headers' do + it 'returns empty hash' do + expect(provider.headers).to eq({}) + end + end + + describe '.local?' do + it 'returns true' do + expect(described_class.local?).to be true + end + end + + describe '.configuration_requirements' do + it 'returns empty array' do + expect(described_class.configuration_requirements).to eq([]) + end + end + + describe '.capabilities' do + it 'returns the Capabilities module' do + expect(described_class.capabilities).to eq(RubyLLM::Providers::RedCandle::Capabilities) + end + end +end \ No newline at end of file From 5c770ddd3cb31cd1b0054f872f26c323355c5b8b Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 14:31:32 -0700 Subject: [PATCH 02/38] Starting to work --- lib/ruby_llm/providers/red_candle/chat.rb | 62 +++++++++++++++------ lib/ruby_llm/providers/red_candle/models.rb | 16 ++++++ 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index ec9d7c8ed..1e2ed798a 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -4,6 +4,30 @@ module RubyLLM module Providers class RedCandle module Chat + # Override the base complete method to handle local execution + def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) + payload = render_payload( + messages, + tools: tools, + temperature: temperature, + model: model, + stream: block_given?, + schema: schema + ).merge(params) + + if block_given? + perform_streaming_completion!(payload, &) + else + result = perform_completion!(payload) + # Convert to Message object for compatibility + Message.new( + role: result[:role].to_sym, + content: result[:content], + model_id: model.id + ) + end + end + def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # Red Candle doesn't support tools if tools && !tools.empty? @@ -68,18 +92,13 @@ def perform_streaming_completion!(payload, &block) ) # Stream tokens - buffer = '' model.generate_stream(prompt, config: config) do |token| - buffer += token chunk = format_stream_chunk(token) block.call(chunk) end # Send final chunk with finish reason - final_chunk = { - delta: { content: '' }, - finish_reason: 'stop' - } + final_chunk = format_stream_chunk('', 'stop') block.call(final_chunk) end @@ -90,14 +109,19 @@ def ensure_model_loaded!(model_id) end def load_model(model_id) - # Handle GGUF models with specific files - if model_id == 'google/gemma-3-4b-it-qat-q4_0-gguf' - ::Candle::LLM.from_pretrained( - model_id, - device: @device, - gguf_file: 'gemma-3-4b-it-q4_0.gguf' - ) + # Get GGUF file and tokenizer if this is a GGUF model + # Access the methods from the Models module which is included in the provider + gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil + tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil + + if gguf_file + # For GGUF models, use the tokenizer if specified, otherwise use model_id + options = { device: @device, gguf_file: gguf_file } + options[:tokenizer] = tokenizer if tokenizer + + ::Candle::LLM.from_pretrained(model_id, **options) else + # For regular models, use from_pretrained without gguf_file ::Candle::LLM.from_pretrained(model_id, device: @device) end rescue StandardError => e @@ -156,11 +180,13 @@ def format_response(response, schema) } end - def format_stream_chunk(token) - { - delta: { content: token }, - finish_reason: nil - } + def format_stream_chunk(token, finish_reason = nil) + # Return a Chunk object for streaming compatibility + Chunk.new( + role: :assistant, + content: token, + finish_reason: finish_reason + ) end end end diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb index f1d8a7754..66c9b6955 100644 --- a/lib/ruby_llm/providers/red_candle/models.rb +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -9,12 +9,23 @@ module Models id: 'google/gemma-3-4b-it-qat-q4_0-gguf', name: 'Gemma 3 4B Instruct (Quantized)', gguf_file: 'gemma-3-4b-it-q4_0.gguf', + tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model context_window: 8192, family: 'gemma', architecture: 'gemma2', supports_chat: true, supports_structured: true }, + { + id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', + name: 'TinyLlama 1.1B Chat (Quantized)', + gguf_file: 'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf', + context_window: 2048, + family: 'llama', + architecture: 'llama', + supports_chat: true, + supports_structured: true + }, { id: 'Qwen/Qwen2.5-0.5B-Instruct', name: 'Qwen 2.5 0.5B Instruct', @@ -75,6 +86,11 @@ def gguf_file_for(model_id) info = model_info(model_id) info ? info[:gguf_file] : nil end + + def tokenizer_for(model_id) + info = model_info(model_id) + info ? info[:tokenizer] : nil + end end end end From fe199a850d7a79176f640062b5f904ed45829bbc Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 14:41:19 -0700 Subject: [PATCH 03/38] Swap qwen for mistral --- lib/ruby_llm/providers/red_candle/chat.rb | 12 +++---- lib/ruby_llm/providers/red_candle/models.rb | 10 +++--- .../providers/red_candle/chat_spec.rb | 31 ++++++++--------- .../providers/red_candle/models_spec.rb | 33 +++++++++++-------- 4 files changed, 45 insertions(+), 41 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 1e2ed798a..c4838496f 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -97,8 +97,8 @@ def perform_streaming_completion!(payload, &block) block.call(chunk) end - # Send final chunk with finish reason - final_chunk = format_stream_chunk('', 'stop') + # Send final chunk with empty content (indicates completion) + final_chunk = format_stream_chunk('') block.call(final_chunk) end @@ -175,17 +175,15 @@ def format_response(response, schema) { content: content, - role: 'assistant', - finish_reason: 'stop' + role: 'assistant' } end - def format_stream_chunk(token, finish_reason = nil) + def format_stream_chunk(token) # Return a Chunk object for streaming compatibility Chunk.new( role: :assistant, - content: token, - finish_reason: finish_reason + content: token ) end end diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb index 66c9b6955..7d520832e 100644 --- a/lib/ruby_llm/providers/red_candle/models.rb +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -27,11 +27,13 @@ module Models supports_structured: true }, { - id: 'Qwen/Qwen2.5-0.5B-Instruct', - name: 'Qwen 2.5 0.5B Instruct', + id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', + name: 'Mistral 7B Instruct v0.2 (Quantized)', + gguf_file: 'mistral-7b-instruct-v0.2.Q4_K_M.gguf', + tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2', context_window: 32_768, - family: 'qwen', - architecture: 'qwen2', + family: 'mistral', + architecture: 'mistral', supports_chat: true, supports_structured: true } diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb index 513dd192c..42a1f7a41 100644 --- a/spec/ruby_llm/providers/red_candle/chat_spec.rb +++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb @@ -5,7 +5,7 @@ RSpec.describe RubyLLM::Providers::RedCandle::Chat do let(:config) { RubyLLM::Configuration.new } let(:provider) { RubyLLM::Providers::RedCandle.new(config) } - let(:model) { provider.model('Qwen/Qwen2.5-0.5B-Instruct') } + let(:model) { provider.model('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') } before(:all) do begin @@ -31,7 +31,7 @@ expect(payload).to include( messages: messages, temperature: 0.7, - model: 'Qwen/Qwen2.5-0.5B-Instruct', + model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', stream: false, schema: nil ) @@ -84,7 +84,7 @@ payload = { messages: messages, - model: 'Qwen/Qwen2.5-0.5B-Instruct', + model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', temperature: 0.7 } @@ -92,8 +92,7 @@ expect(result).to include( content: 'Generated response', - role: 'assistant', - finish_reason: 'stop' + role: 'assistant' ) end end @@ -107,7 +106,7 @@ payload = { messages: messages, - model: 'Qwen/Qwen2.5-0.5B-Instruct', + model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', temperature: 0.7, schema: schema } @@ -127,7 +126,7 @@ payload = { messages: messages, - model: 'Qwen/Qwen2.5-0.5B-Instruct', + model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', temperature: 0.7, schema: schema } @@ -160,7 +159,7 @@ payload = { messages: messages, - model: 'Qwen/Qwen2.5-0.5B-Instruct', + model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', temperature: 0.7 } @@ -170,17 +169,15 @@ # Check token chunks tokens.each_with_index do |token, i| - expect(chunks_received[i]).to include( - delta: { content: token }, - finish_reason: nil - ) + chunk = chunks_received[i] + expect(chunk).to be_a(RubyLLM::Chunk) + expect(chunk.content).to eq(token) end - # Check final chunk - expect(chunks_received.last).to include( - delta: { content: '' }, - finish_reason: 'stop' - ) + # Check final chunk (empty content indicates completion) + final_chunk = chunks_received.last + expect(final_chunk).to be_a(RubyLLM::Chunk) + expect(final_chunk.content).to eq('') end end diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb index 8c89147bc..1f8533b6c 100644 --- a/spec/ruby_llm/providers/red_candle/models_spec.rb +++ b/spec/ruby_llm/providers/red_candle/models_spec.rb @@ -18,23 +18,24 @@ it 'returns an array of supported models' do models = provider.models expect(models).to be_an(Array) - expect(models.size).to eq(2) + expect(models.size).to eq(3) expect(models.first).to be_a(RubyLLM::Model::Info) end it 'includes the expected model IDs' do model_ids = provider.models.map(&:id) expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf') - expect(model_ids).to include('Qwen/Qwen2.5-0.5B-Instruct') + expect(model_ids).to include('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') + expect(model_ids).to include('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') end end describe '#model' do context 'with a valid model ID' do it 'returns the model' do - model = provider.model('Qwen/Qwen2.5-0.5B-Instruct') + model = provider.model('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') expect(model).to be_a(RubyLLM::Model::Info) - expect(model.id).to eq('Qwen/Qwen2.5-0.5B-Instruct') + expect(model.id).to eq('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') end end @@ -51,7 +52,7 @@ describe '#model_available?' do it 'returns true for supported models' do expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.model_available?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + expect(provider.model_available?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true end it 'returns false for unsupported models' do @@ -61,12 +62,12 @@ describe '#model_info' do it 'returns model information' do - info = provider.model_info('Qwen/Qwen2.5-0.5B-Instruct') + info = provider.model_info('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') expect(info).to include( - id: 'Qwen/Qwen2.5-0.5B-Instruct', - name: 'Qwen 2.5 0.5B Instruct', + id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', + name: 'Mistral 7B Instruct v0.2 (Quantized)', context_window: 32_768, - family: 'qwen', + family: 'mistral', supports_chat: true, supports_structured: true ) @@ -82,22 +83,28 @@ expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf') end - it 'returns nil for non-GGUF models' do - expect(provider.gguf_file_for('Qwen/Qwen2.5-0.5B-Instruct')).to be_nil + it 'returns the GGUF file for Mistral model' do + expect(provider.gguf_file_for('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq('mistral-7b-instruct-v0.2.Q4_K_M.gguf') + end + + it 'returns nil for unknown models' do + expect(provider.gguf_file_for('unknown')).to be_nil end end describe '#supports_chat?' do it 'returns true for all current models' do expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.supports_chat?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + expect(provider.supports_chat?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true + expect(provider.supports_chat?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true end end describe '#supports_structured?' do it 'returns true for all current models' do expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.supports_structured?('Qwen/Qwen2.5-0.5B-Instruct')).to be true + expect(provider.supports_structured?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true + expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true end end end \ No newline at end of file From b8bf3319d357ef1f60dfb91c11a1502858d72246 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 14:58:58 -0700 Subject: [PATCH 04/38] Trying to add red-candle to the models_to_test.rb --- lib/ruby_llm/providers/red_candle/chat.rb | 40 +++++++++++++++++++---- spec/ruby_llm/chat_spec.rb | 5 +++ spec/support/models_to_test.rb | 1 + 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index c4838496f..af915fbcb 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -20,10 +20,18 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc else result = perform_completion!(payload) # Convert to Message object for compatibility + # Red Candle doesn't provide token counts, but we can estimate them + content = result[:content] + # Rough estimation: ~4 characters per token + estimated_output_tokens = (content.length / 4.0).round + estimated_input_tokens = estimate_input_tokens(payload[:messages]) + Message.new( role: result[:role].to_sym, - content: result[:content], - model_id: model.id + content: content, + model_id: model.id, + input_tokens: estimated_input_tokens, + output_tokens: estimated_output_tokens ) end end @@ -130,13 +138,26 @@ def load_model(model_id) def format_messages(messages) messages.map do |msg| - { - role: msg[:role].to_s, - content: extract_message_content(msg) - } + # Handle both hash and Message objects + if msg.is_a?(Message) + { + role: msg.role.to_s, + content: extract_message_content_from_object(msg) + } + else + { + role: msg[:role].to_s, + content: extract_message_content(msg) + } + end end end + def extract_message_content_from_object(message) + # For Message objects, get the content directly + message.content.to_s + end + def extract_message_content(message) content = message[:content] return content if content.is_a?(String) @@ -186,6 +207,13 @@ def format_stream_chunk(token) content: token ) end + + def estimate_input_tokens(messages) + # Rough estimation: ~4 characters per token + formatted = format_messages(messages) + total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length } + (total_chars / 4.0).round + end end end end diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb index 1c775d11e..c1df68a43 100644 --- a/spec/ruby_llm/chat_spec.rb +++ b/spec/ruby_llm/chat_spec.rb @@ -20,6 +20,11 @@ end it "#{provider}/#{model} returns raw responses" do + # Red Candle is a truly local provider and doesn't have HTTP responses + if provider == :red_candle + skip 'Red Candle provider does not have raw HTTP responses' + end + chat = RubyLLM.chat(model: model, provider: provider) response = chat.ask('What is the capital of France?') expect(response.raw).to be_present diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb index 2ad90e4b4..04591ebee 100644 --- a/spec/support/models_to_test.rb +++ b/spec/support/models_to_test.rb @@ -11,6 +11,7 @@ { provider: :openai, model: 'gpt-4.1-nano' }, { provider: :openrouter, model: 'anthropic/claude-3.5-haiku' }, { provider: :perplexity, model: 'sonar' }, + { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' }, { provider: :vertexai, model: 'gemini-2.5-flash' } ].freeze From d98834c345f4d639666b12e273a0d79fda788090 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 15:11:37 -0700 Subject: [PATCH 05/38] Adding red-candle to the models_to_test file --- .../providers/red_candle/capabilities.rb | 2 +- lib/ruby_llm/providers/red_candle/chat.rb | 62 +++++++++++++++++-- spec/ruby_llm/chat_streaming_spec.rb | 18 ++++-- spec/ruby_llm/chat_tools_spec.rb | 54 +++++++++++----- spec/support/streaming_error_helpers.rb | 12 +++- 5 files changed, 119 insertions(+), 29 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index 3266f1ebf..ae8efda17 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -10,7 +10,7 @@ def supports_vision? false end - def supports_functions? + def supports_functions?(_model_id = nil) false end diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index af915fbcb..b90cd2127 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -99,8 +99,12 @@ def perform_streaming_completion!(payload, &block) max_length: payload[:max_tokens] || 512 ) + # Collect all streamed content + full_content = '' + # Stream tokens model.generate_stream(prompt, config: config) do |token| + full_content += token chunk = format_stream_chunk(token) block.call(chunk) end @@ -108,6 +112,18 @@ def perform_streaming_completion!(payload, &block) # Send final chunk with empty content (indicates completion) final_chunk = format_stream_chunk('') block.call(final_chunk) + + # Return a Message object with the complete response + estimated_output_tokens = (full_content.length / 4.0).round + estimated_input_tokens = estimate_input_tokens(payload[:messages]) + + Message.new( + role: :assistant, + content: full_content, + model_id: payload[:model], + input_tokens: estimated_input_tokens, + output_tokens: estimated_output_tokens + ) end private @@ -154,16 +170,50 @@ def format_messages(messages) end def extract_message_content_from_object(message) - # For Message objects, get the content directly - message.content.to_s + content = message.content + + # Handle Content objects + if content.is_a?(Content) + # Extract text from Content object, including attachment text + text_parts = [] + text_parts << content.text if content.text + + # Add any text from attachments + content.attachments&.each do |attachment| + if attachment.respond_to?(:data) && attachment.data.is_a?(String) + text_parts << attachment.data + end + end + + text_parts.join(' ') + elsif content.is_a?(String) + content + else + content.to_s + end end def extract_message_content(message) content = message[:content] - return content if content.is_a?(String) - - # Handle array content (e.g., with images) - if content.is_a?(Array) + + # Handle Content objects + if content.is_a?(Content) + # Extract text from Content object + text_parts = [] + text_parts << content.text if content.text + + # Add any text from attachments + content.attachments&.each do |attachment| + if attachment.respond_to?(:data) && attachment.data.is_a?(String) + text_parts << attachment.data + end + end + + text_parts.join(' ') + elsif content.is_a?(String) + content + elsif content.is_a?(Array) + # Handle array content (e.g., with images) content.map do |part| part[:text] if part[:type] == 'text' end.compact.join(' ') diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index fc6ee8d9a..53724b316 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -20,11 +20,15 @@ expect(chunks).not_to be_empty expect(chunks.first).to be_a(RubyLLM::Chunk) - expect(response.raw).to be_present - expect(response.raw.headers).to be_present - expect(response.raw.status).to be_present - expect(response.raw.status).to eq(200) - expect(response.raw.env.request_body).to be_present + + # Red Candle is a local provider without HTTP responses + unless provider == :red_candle + expect(response.raw).to be_present + expect(response.raw.headers).to be_present + expect(response.raw.status).to be_present + expect(response.raw.status).to eq(200) + expect(response.raw.env.request_body).to be_present + end end it "#{provider}/#{model} reports consistent token counts compared to non-streaming" do @@ -60,6 +64,7 @@ end it "#{provider}/#{model} supports handling streaming error chunks" do + skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle # Testing if error handling is now implemented stub_error_response(provider, :chunk) @@ -75,6 +80,7 @@ it "#{provider}/#{model} supports handling streaming error events" do skip 'Bedrock uses AWS Event Stream format, not SSE events' if provider == :bedrock + skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle # Testing if error handling is now implemented @@ -96,6 +102,7 @@ end it "#{provider}/#{model} supports handling streaming error chunks" do + skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle # Testing if error handling is now implemented stub_error_response(provider, :chunk) @@ -111,6 +118,7 @@ it "#{provider}/#{model} supports handling streaming error events" do skip 'Bedrock uses AWS Event Stream format, not SSE events' if provider == :bedrock + skip 'Red Candle is a local provider without HTTP streaming errors' if provider == :red_candle # Testing if error handling is now implemented diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb index cff534172..c61f8cccc 100644 --- a/spec/ruby_llm/chat_tools_spec.rb +++ b/spec/ruby_llm/chat_tools_spec.rb @@ -74,9 +74,11 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools" do - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) end chat = RubyLLM.chat(model: model, provider: provider) @@ -94,9 +96,11 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools in multi-turn conversations" do - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) end chat = RubyLLM.chat(model: model, provider: provider) @@ -118,9 +122,11 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools without parameters" do - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) end chat = RubyLLM.chat(model: model, provider: provider) @@ -136,6 +142,13 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools without parameters in multi-turn streaming conversations" do + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + end + if provider == :gpustack && model == 'qwen3' skip 'gpustack/qwen3 does not support streaming tool calls properly' end @@ -175,6 +188,13 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools with multi-turn streaming conversations" do + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + end + if provider == :gpustack && model == 'qwen3' skip 'gpustack/qwen3 does not support streaming tool calls properly' end @@ -213,9 +233,11 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can handle multiple tool calls in a single response" do - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) end chat = RubyLLM.chat(model: model, provider: provider) @@ -303,9 +325,11 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} preserves Content objects returned from tools" do - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? + # Check if the provider supports functions for this model + provider_class = RubyLLM::Provider.providers[provider] + if provider_class + provider_instance = provider_class.new(RubyLLM.config) + skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) end # Skip providers that don't support images in tool results diff --git a/spec/support/streaming_error_helpers.rb b/spec/support/streaming_error_helpers.rb index 9c89ef9c5..fbc5467f7 100644 --- a/spec/support/streaming_error_helpers.rb +++ b/spec/support/streaming_error_helpers.rb @@ -143,15 +143,23 @@ module StreamingErrorHelpers }, chunk_status: 529, expected_error: RubyLLM::OverloadedError + }, + red_candle: { + # Red Candle is a local provider, so it doesn't have HTTP streaming errors + # We include it here to prevent test failures when checking for error handling + url: nil, + error_response: nil, + chunk_status: nil, + expected_error: nil } }.freeze def error_handling_supported?(provider) - ERROR_HANDLING_CONFIGS.key?(provider) + ERROR_HANDLING_CONFIGS.key?(provider) && ERROR_HANDLING_CONFIGS[provider][:expected_error] end def expected_error_for(provider) - ERROR_HANDLING_CONFIGS[provider][:expected_error] + ERROR_HANDLING_CONFIGS[provider]&.fetch(:expected_error, nil) end def stub_error_response(provider, type) From b207f69c472a4e4ed49152d56ced54369e0cb02d Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 15:21:42 -0700 Subject: [PATCH 06/38] Trying to fix the way tool calling support is checked in the specs --- lib/ruby_llm/providers/red_candle.rb | 4 ++ spec/ruby_llm/chat_tools_spec.rb | 75 ++++++-------------- spec/spec_helper.rb | 1 + spec/support/provider_capabilities_helper.rb | 34 +++++++++ 4 files changed, 62 insertions(+), 52 deletions(-) create mode 100644 spec/support/provider_capabilities_helper.rb diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb index f200e4af4..7ab42729a 100644 --- a/lib/ruby_llm/providers/red_candle.rb +++ b/lib/ruby_llm/providers/red_candle.rb @@ -36,6 +36,10 @@ def configuration_requirements def local? true end + + def supports_functions?(model_id = nil) + RedCandle::Capabilities.supports_functions?(model_id) + end end private diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb index c61f8cccc..4eafc5661 100644 --- a/spec/ruby_llm/chat_tools_spec.rb +++ b/spec/ruby_llm/chat_tools_spec.rb @@ -74,11 +74,9 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end chat = RubyLLM.chat(model: model, provider: provider) @@ -96,11 +94,9 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools in multi-turn conversations" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end chat = RubyLLM.chat(model: model, provider: provider) @@ -122,11 +118,9 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools without parameters" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end chat = RubyLLM.chat(model: model, provider: provider) @@ -142,23 +136,13 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools without parameters in multi-turn streaming conversations" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end - if provider == :gpustack && model == 'qwen3' - skip 'gpustack/qwen3 does not support streaming tool calls properly' - end - + skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3' skip 'Mistral has a bug with tool arguments in multi-turn streaming' if provider == :mistral - - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? - end chat = RubyLLM.chat(model: model, provider: provider) .with_tool(BestLanguageToLearn) .with_instructions('You must use tools whenever possible.') @@ -188,21 +172,12 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can use tools with multi-turn streaming conversations" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end - if provider == :gpustack && model == 'qwen3' - skip 'gpustack/qwen3 does not support streaming tool calls properly' - end - - unless RubyLLM::Provider.providers[provider]&.local? - model_info = RubyLLM.models.find(model) - skip "#{model} doesn't support function calling" unless model_info&.supports_functions? - end + skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3' chat = RubyLLM.chat(model: model, provider: provider) .with_tool(Weather) # Disable thinking mode for qwen models @@ -233,11 +208,9 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} can handle multiple tool calls in a single response" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end chat = RubyLLM.chat(model: model, provider: provider) @@ -325,11 +298,9 @@ def execute(query:) model = model_info[:model] provider = model_info[:provider] it "#{provider}/#{model} preserves Content objects returned from tools" do - # Check if the provider supports functions for this model - provider_class = RubyLLM::Provider.providers[provider] - if provider_class - provider_instance = provider_class.new(RubyLLM.config) - skip "#{provider}/#{model} doesn't support function calling" unless provider_instance.supports_functions?(model) + # Skip for providers that don't support function calling + unless provider_supports_functions?(provider, model) + skip "#{provider}/#{model} doesn't support function calling" end # Skip providers that don't support images in tool results diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 0b60aa315..403fe36b2 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -17,3 +17,4 @@ require_relative 'support/vcr_configuration' require_relative 'support/models_to_test' require_relative 'support/streaming_error_helpers' +require_relative 'support/provider_capabilities_helper' diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb new file mode 100644 index 000000000..c1171b7d8 --- /dev/null +++ b/spec/support/provider_capabilities_helper.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module ProviderCapabilitiesHelper + def provider_supports_functions?(provider, model) + provider_class = RubyLLM::Provider.providers[provider] + + # Check if the provider class has a supports_functions? method + if provider_class&.respond_to?(:supports_functions?) + # Use the provider's class method if available + provider_class.supports_functions?(model) + elsif provider_class&.respond_to?(:capabilities) + # Check the provider's capabilities module + capabilities = provider_class.capabilities + if capabilities&.respond_to?(:supports_functions?) + capabilities.supports_functions?(model) + else + # Default to true if no explicit capability defined + true + end + elsif provider_class&.local? + # For local providers without explicit support method, assume false + # (they should implement supports_functions? if they support it) + false + else + # For remote providers, check the model registry + model_info = RubyLLM.models.find(model) + model_info&.supports_functions? || false + end + end +end + +RSpec.configure do |config| + config.include ProviderCapabilitiesHelper +end \ No newline at end of file From ab46320f93f6fd1cc7663c1140b30cda4253cb00 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 15:42:36 -0700 Subject: [PATCH 07/38] Deconvoluting local model checks and tool calling support --- spec/support/provider_capabilities_helper.rb | 29 ++++++++------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb index c1171b7d8..ad9757ede 100644 --- a/spec/support/provider_capabilities_helper.rb +++ b/spec/support/provider_capabilities_helper.rb @@ -4,27 +4,20 @@ module ProviderCapabilitiesHelper def provider_supports_functions?(provider, model) provider_class = RubyLLM::Provider.providers[provider] - # Check if the provider class has a supports_functions? method - if provider_class&.respond_to?(:supports_functions?) - # Use the provider's class method if available - provider_class.supports_functions?(model) - elsif provider_class&.respond_to?(:capabilities) - # Check the provider's capabilities module - capabilities = provider_class.capabilities - if capabilities&.respond_to?(:supports_functions?) - capabilities.supports_functions?(model) - else - # Default to true if no explicit capability defined - true - end - elsif provider_class&.local? - # For local providers without explicit support method, assume false - # (they should implement supports_functions? if they support it) - false + # Special case for providers we know don't support functions + return false if provider == :red_candle || provider == :perplexity + + # For local providers (Ollama, GPUStack), default to true unless the model is known not to support it + if provider_class&.local? + # Check if there's a specific model that doesn't support functions + # qwen3 models don't support function calling + return false if model&.include?('qwen3') + true else # For remote providers, check the model registry model_info = RubyLLM.models.find(model) - model_info&.supports_functions? || false + # If not in registry, default to true (was running before) + model_info.nil? ? true : model_info.supports_functions? end end end From 97d58d269da74e08d4527ca4cc91670110a993b6 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 16:06:49 -0700 Subject: [PATCH 08/38] I think we finally got the local tool calling check correct --- spec/support/provider_capabilities_helper.rb | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb index ad9757ede..2dc1c7363 100644 --- a/spec/support/provider_capabilities_helper.rb +++ b/spec/support/provider_capabilities_helper.rb @@ -7,18 +7,9 @@ def provider_supports_functions?(provider, model) # Special case for providers we know don't support functions return false if provider == :red_candle || provider == :perplexity - # For local providers (Ollama, GPUStack), default to true unless the model is known not to support it - if provider_class&.local? - # Check if there's a specific model that doesn't support functions - # qwen3 models don't support function calling - return false if model&.include?('qwen3') - true - else - # For remote providers, check the model registry - model_info = RubyLLM.models.find(model) - # If not in registry, default to true (was running before) - model_info.nil? ? true : model_info.supports_functions? - end + # For all other providers, assume they support functions + # The original tests weren't skipping these, so they must have been running + true end end From 9c7f9dc240ad5a7034cfc90bd82a8732272e3f34 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 16:18:41 -0700 Subject: [PATCH 09/38] Enable context length validation for the RedCandle Provider --- gemfiles/rails_7.1.gemfile.lock | 13 +++++- gemfiles/rails_7.2.gemfile.lock | 13 +++++- gemfiles/rails_8.0.gemfile.lock | 13 +++++- lib/ruby_llm/providers/red_candle/chat.rb | 52 +++++++++++++++++------ spec/ruby_llm/chat_error_spec.rb | 3 +- 5 files changed, 76 insertions(+), 18 deletions(-) diff --git a/gemfiles/rails_7.1.gemfile.lock b/gemfiles/rails_7.1.gemfile.lock index 955970e02..cced4b111 100644 --- a/gemfiles/rails_7.1.gemfile.lock +++ b/gemfiles/rails_7.1.gemfile.lock @@ -98,7 +98,7 @@ GEM rake thor (>= 0.14.0) ast (2.4.3) - async (2.30.0) + async (2.31.0) console (~> 1.29) fiber-annotation io-event (~> 1.11) @@ -148,6 +148,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -224,6 +225,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.9-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.9-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -287,9 +290,14 @@ GEM zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.3.0) + rake-compiler-dock (1.9.1) + rb_sys (0.9.117) + rake-compiler-dock (= 1.9.1) rdoc (6.14.2) erb psych (>= 4.0.0) + red-candle (1.2.3) + rb_sys regexp_parser (2.11.2) reline (0.6.2) io-console (~> 0.5) @@ -355,6 +363,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.3-arm64-darwin) sqlite3 (2.7.3-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -380,6 +389,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-24 x86_64-linux DEPENDENCIES @@ -401,6 +411,7 @@ DEPENDENCIES pry (>= 0.14) rails (~> 7.1.0) rake (>= 13.0) + red-candle (~> 1.2) reline rspec (~> 3.12) rubocop (>= 1.0) diff --git a/gemfiles/rails_7.2.gemfile.lock b/gemfiles/rails_7.2.gemfile.lock index 442716d75..a0535832e 100644 --- a/gemfiles/rails_7.2.gemfile.lock +++ b/gemfiles/rails_7.2.gemfile.lock @@ -92,7 +92,7 @@ GEM rake thor (>= 0.14.0) ast (2.4.3) - async (2.30.0) + async (2.31.0) console (~> 1.29) fiber-annotation io-event (~> 1.11) @@ -142,6 +142,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -217,6 +218,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.9-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.9-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -280,9 +283,14 @@ GEM zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.3.0) + rake-compiler-dock (1.9.1) + rb_sys (0.9.117) + rake-compiler-dock (= 1.9.1) rdoc (6.14.2) erb psych (>= 4.0.0) + red-candle (1.2.3) + rb_sys regexp_parser (2.11.2) reline (0.6.2) io-console (~> 0.5) @@ -348,6 +356,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.3-arm64-darwin) sqlite3 (2.7.3-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -374,6 +383,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-24 x86_64-linux DEPENDENCIES @@ -395,6 +405,7 @@ DEPENDENCIES pry (>= 0.14) rails (~> 7.2.0) rake (>= 13.0) + red-candle (~> 1.2) reline rspec (~> 3.12) rubocop (>= 1.0) diff --git a/gemfiles/rails_8.0.gemfile.lock b/gemfiles/rails_8.0.gemfile.lock index 7d12b757d..2e32e6b05 100644 --- a/gemfiles/rails_8.0.gemfile.lock +++ b/gemfiles/rails_8.0.gemfile.lock @@ -92,7 +92,7 @@ GEM rake thor (>= 0.14.0) ast (2.4.3) - async (2.30.0) + async (2.31.0) console (~> 1.29) fiber-annotation io-event (~> 1.11) @@ -142,6 +142,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -217,6 +218,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.9-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.9-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -280,9 +283,14 @@ GEM zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.3.0) + rake-compiler-dock (1.9.1) + rb_sys (0.9.117) + rake-compiler-dock (= 1.9.1) rdoc (6.14.2) erb psych (>= 4.0.0) + red-candle (1.2.3) + rb_sys regexp_parser (2.11.2) reline (0.6.2) io-console (~> 0.5) @@ -348,6 +356,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.3-arm64-darwin) sqlite3 (2.7.3-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -374,6 +383,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-24 x86_64-linux DEPENDENCIES @@ -395,6 +405,7 @@ DEPENDENCIES pry (>= 0.14) rails (~> 8.0.0) rake (>= 13.0) + red-candle (~> 1.2) reline rspec (~> 3.12) rubocop (>= 1.0) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index b90cd2127..7ae341186 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -25,7 +25,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc # Rough estimation: ~4 characters per token estimated_output_tokens = (content.length / 4.0).round estimated_input_tokens = estimate_input_tokens(payload[:messages]) - + Message.new( role: result[:role].to_sym, content: content, @@ -63,6 +63,9 @@ def perform_completion!(payload) messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" end + # Check context length + validate_context_length!(prompt, payload[:model]) + # Configure generation config_opts = { temperature: payload[:temperature] || 0.7, @@ -93,6 +96,9 @@ def perform_streaming_completion!(payload, &block) messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" end + # Check context length + validate_context_length!(prompt, payload[:model]) + # Configure generation config = ::Candle::GenerationConfig.balanced( temperature: payload[:temperature] || 0.7, @@ -101,7 +107,7 @@ def perform_streaming_completion!(payload, &block) # Collect all streamed content full_content = '' - + # Stream tokens model.generate_stream(prompt, config: config) do |token| full_content += token @@ -112,11 +118,11 @@ def perform_streaming_completion!(payload, &block) # Send final chunk with empty content (indicates completion) final_chunk = format_stream_chunk('') block.call(final_chunk) - + # Return a Message object with the complete response estimated_output_tokens = (full_content.length / 4.0).round estimated_input_tokens = estimate_input_tokens(payload[:messages]) - + Message.new( role: :assistant, content: full_content, @@ -133,16 +139,16 @@ def ensure_model_loaded!(model_id) end def load_model(model_id) - # Get GGUF file and tokenizer if this is a GGUF model + # Get GGUF file and tokenizer if this is a GGUF model # Access the methods from the Models module which is included in the provider gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil - + if gguf_file # For GGUF models, use the tokenizer if specified, otherwise use model_id options = { device: @device, gguf_file: gguf_file } options[:tokenizer] = tokenizer if tokenizer - + ::Candle::LLM.from_pretrained(model_id, **options) else # For regular models, use from_pretrained without gguf_file @@ -171,20 +177,20 @@ def format_messages(messages) def extract_message_content_from_object(message) content = message.content - + # Handle Content objects if content.is_a?(Content) # Extract text from Content object, including attachment text text_parts = [] text_parts << content.text if content.text - + # Add any text from attachments content.attachments&.each do |attachment| if attachment.respond_to?(:data) && attachment.data.is_a?(String) text_parts << attachment.data end end - + text_parts.join(' ') elsif content.is_a?(String) content @@ -195,20 +201,20 @@ def extract_message_content_from_object(message) def extract_message_content(message) content = message[:content] - + # Handle Content objects if content.is_a?(Content) # Extract text from Content object text_parts = [] text_parts << content.text if content.text - + # Add any text from attachments content.attachments&.each do |attachment| if attachment.respond_to?(:data) && attachment.data.is_a?(String) text_parts << attachment.data end end - + text_parts.join(' ') elsif content.is_a?(String) content @@ -264,7 +270,25 @@ def estimate_input_tokens(messages) total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length } (total_chars / 4.0).round end + + def validate_context_length!(prompt, model_id) + # Get the context window for this model + context_window = if respond_to?(:model_context_window) + model_context_window(model_id) + else + 4096 # Conservative default + end + + # Estimate tokens in prompt (~4 characters per token) + estimated_tokens = (prompt.length / 4.0).round + + # Check if prompt exceeds context window (leave some room for response) + max_input_tokens = context_window - 512 # Reserve 512 tokens for response + if estimated_tokens > max_input_tokens + raise Error.new(nil, "Context length exceeded. Estimated #{estimated_tokens} tokens, but model #{model_id} has a context window of #{context_window} tokens.") + end + end end end end -end \ No newline at end of file +end diff --git a/spec/ruby_llm/chat_error_spec.rb b/spec/ruby_llm/chat_error_spec.rb index a5dfd8a74..eeefbf64d 100644 --- a/spec/ruby_llm/chat_error_spec.rb +++ b/spec/ruby_llm/chat_error_spec.rb @@ -72,7 +72,8 @@ let(:chat) { RubyLLM.chat(model: model, provider: provider) } it 'handles context length exceeded errors' do - if RubyLLM::Provider.providers[provider]&.local? + # Skip for local providers that don't validate context length + if RubyLLM::Provider.providers[provider]&.local? && provider != :red_candle skip('Local providers do not throw an error for context length exceeded') end From d5c912997c9234297f22a303d4a43700579b3e7d Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 16:30:04 -0700 Subject: [PATCH 10/38] Working on rubocop fixes --- lib/ruby_llm/providers/red_candle/chat.rb | 39 +++++++++++------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 7ae341186..ba8215714 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -3,9 +3,11 @@ module RubyLLM module Providers class RedCandle + # Chat implementation for Red Candle provider module Chat # Override the base complete method to handle local execution def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) + _ = headers # Interface compatibility payload = render_payload( messages, tools: tools, @@ -38,9 +40,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # Red Candle doesn't support tools - if tools && !tools.empty? - raise Error.new(nil, 'Red Candle provider does not support tool calling') - end + raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty? { messages: messages, @@ -60,7 +60,7 @@ def perform_completion!(payload) model.apply_chat_template(messages) else # Fallback to simple formatting - messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" + "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:" end # Check context length @@ -93,7 +93,7 @@ def perform_streaming_completion!(payload, &block) prompt = if model.respond_to?(:apply_chat_template) model.apply_chat_template(messages) else - messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n") + "\n\nassistant:" + "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:" end # Check context length @@ -186,9 +186,7 @@ def extract_message_content_from_object(message) # Add any text from attachments content.attachments&.each do |attachment| - if attachment.respond_to?(:data) && attachment.data.is_a?(String) - text_parts << attachment.data - end + text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String) end text_parts.join(' ') @@ -203,26 +201,23 @@ def extract_message_content(message) content = message[:content] # Handle Content objects - if content.is_a?(Content) + case content + when Content # Extract text from Content object text_parts = [] text_parts << content.text if content.text # Add any text from attachments content.attachments&.each do |attachment| - if attachment.respond_to?(:data) && attachment.data.is_a?(String) - text_parts << attachment.data - end + text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String) end text_parts.join(' ') - elsif content.is_a?(String) + when String content - elsif content.is_a?(Array) + when Array # Handle array content (e.g., with images) - content.map do |part| - part[:text] if part[:type] == 'text' - end.compact.join(' ') + content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ') else content.to_s end @@ -284,9 +279,13 @@ def validate_context_length!(prompt, model_id) # Check if prompt exceeds context window (leave some room for response) max_input_tokens = context_window - 512 # Reserve 512 tokens for response - if estimated_tokens > max_input_tokens - raise Error.new(nil, "Context length exceeded. Estimated #{estimated_tokens} tokens, but model #{model_id} has a context window of #{context_window} tokens.") - end + return unless estimated_tokens > max_input_tokens + + raise Error.new( + nil, + "Context length exceeded. Estimated #{estimated_tokens} tokens, " \ + "but model #{model_id} has a context window of #{context_window} tokens." + ) end end end From 70e1b247b61fda0915a1783f7198a5364ae843d8 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Mon, 8 Sep 2025 17:02:10 -0700 Subject: [PATCH 11/38] Fixing the rubocop errors --- Gemfile | 1 + gemfiles/rails_7.1.gemfile | 1 + gemfiles/rails_7.2.gemfile | 1 + gemfiles/rails_8.0.gemfile | 1 + lib/ruby_llm/providers/red_candle.rb | 2 +- .../providers/red_candle/capabilities.rb | 18 +++++----- lib/ruby_llm/providers/red_candle/chat.rb | 36 +++++++++---------- lib/ruby_llm/providers/red_candle/models.rb | 8 +++-- .../providers/red_candle/streaming.rb | 3 +- ruby_llm.gemspec | 2 -- spec/ruby_llm/chat_spec.rb | 6 ++-- spec/ruby_llm/chat_streaming_spec.rb | 2 +- spec/ruby_llm/chat_tools_spec.rb | 12 ++++--- .../providers/red_candle/capabilities_spec.rb | 2 +- .../providers/red_candle/chat_spec.rb | 18 +++++----- .../providers/red_candle/models_spec.rb | 18 +++++----- spec/ruby_llm/providers/red_candle_spec.rb | 19 +++++----- spec/support/provider_capabilities_helper.rb | 12 +++---- 18 files changed, 80 insertions(+), 82 deletions(-) diff --git a/Gemfile b/Gemfile index e4471200d..7d288ba14 100644 --- a/Gemfile +++ b/Gemfile @@ -20,6 +20,7 @@ group :development do # rubocop:disable Metrics/BlockLength gem 'pry', '>= 0.14' gem 'rails' gem 'rake', '>= 13.0' + gem 'red-candle', '~> 1.2' gem 'reline' gem 'rspec', '~> 3.12' gem 'rubocop', '>= 1.0' diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile index 675cb178e..36123cc5c 100644 --- a/gemfiles/rails_7.1.gemfile +++ b/gemfiles/rails_7.1.gemfile @@ -18,6 +18,7 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 7.1.0" gem "rake", ">= 13.0" + gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile index 4922afb60..cfd31e0a8 100644 --- a/gemfiles/rails_7.2.gemfile +++ b/gemfiles/rails_7.2.gemfile @@ -18,6 +18,7 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 7.2.0" gem "rake", ">= 13.0" + gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile index f890433bf..4dc65e846 100644 --- a/gemfiles/rails_8.0.gemfile +++ b/gemfiles/rails_8.0.gemfile @@ -18,6 +18,7 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 8.0.0" gem "rake", ">= 13.0" + gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb index 7ab42729a..7bbf62555 100644 --- a/lib/ruby_llm/providers/red_candle.rb +++ b/lib/ruby_llm/providers/red_candle.rb @@ -71,4 +71,4 @@ def determine_device(config) end end end -end \ No newline at end of file +end diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index ae8efda17..c63c82f44 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -3,8 +3,9 @@ module RubyLLM module Providers class RedCandle + # Determines capabilities and pricing for RedCandle models module Capabilities - extend self + module_function def supports_vision? false @@ -38,7 +39,7 @@ def supports_pdf? false end - def normalize_temperature(temperature, model_id) + def normalize_temperature(temperature, _model_id) # Red Candle uses standard 0-2 range return 0.7 if temperature.nil? @@ -109,14 +110,13 @@ def model_families def available_on_platform? # Check if Candle can be loaded - begin - require 'candle' - true - rescue LoadError - false - end + + require 'candle' + true + rescue LoadError + false end end end end -end \ No newline at end of file +end diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index ba8215714..30b7347f6 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -6,7 +6,7 @@ class RedCandle # Chat implementation for Red Candle provider module Chat # Override the base complete method to handle local execution - def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) + def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists _ = headers # Interface compatibility payload = render_payload( messages, @@ -38,7 +38,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc end end - def render_payload(messages, tools:, temperature:, model:, stream:, schema:) + def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists # Red Candle doesn't support tools raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty? @@ -181,15 +181,7 @@ def extract_message_content_from_object(message) # Handle Content objects if content.is_a?(Content) # Extract text from Content object, including attachment text - text_parts = [] - text_parts << content.text if content.text - - # Add any text from attachments - content.attachments&.each do |attachment| - text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String) - end - - text_parts.join(' ') + handle_content_object(content) elsif content.is_a?(String) content else @@ -204,15 +196,7 @@ def extract_message_content(message) case content when Content # Extract text from Content object - text_parts = [] - text_parts << content.text if content.text - - # Add any text from attachments - content.attachments&.each do |attachment| - text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String) - end - - text_parts.join(' ') + handle_content_object(content) when String content when Array @@ -223,6 +207,18 @@ def extract_message_content(message) end end + def handle_content_object(content) + text_parts = [] + text_parts << content.text if content.text + + # Add any text from attachments + content.attachments&.each do |attachment| + text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String) + end + + text_parts.join(' ') + end + def generate_with_schema(model, prompt, schema, config_opts) model.generate_structured( prompt, diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb index 7d520832e..177688ed5 100644 --- a/lib/ruby_llm/providers/red_candle/models.rb +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -3,13 +3,14 @@ module RubyLLM module Providers class RedCandle + # Models methods of the RedCandle integration module Models SUPPORTED_MODELS = [ { id: 'google/gemma-3-4b-it-qat-q4_0-gguf', name: 'Gemma 3 4B Instruct (Quantized)', gguf_file: 'gemma-3-4b-it-q4_0.gguf', - tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model + tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model context_window: 8192, family: 'gemma', architecture: 'gemma2', @@ -59,7 +60,8 @@ def models def model(id) models.find { |m| m.id == id } || - raise(Error.new(nil, "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}")) + raise(Error.new(nil, + "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}")) end def model_available?(id) @@ -96,4 +98,4 @@ def tokenizer_for(model_id) end end end -end \ No newline at end of file +end diff --git a/lib/ruby_llm/providers/red_candle/streaming.rb b/lib/ruby_llm/providers/red_candle/streaming.rb index f0598ce8e..a8305ffdd 100644 --- a/lib/ruby_llm/providers/red_candle/streaming.rb +++ b/lib/ruby_llm/providers/red_candle/streaming.rb @@ -3,6 +3,7 @@ module RubyLLM module Providers class RedCandle + # Streaming methods of the RedCandle integration module Streaming def stream(payload, &block) if payload[:stream] @@ -36,4 +37,4 @@ def process_stream_response(response) end end end -end \ No newline at end of file +end diff --git a/ruby_llm.gemspec b/ruby_llm.gemspec index cf92fcd44..3e6e6af20 100644 --- a/ruby_llm.gemspec +++ b/ruby_llm.gemspec @@ -41,6 +41,4 @@ Gem::Specification.new do |spec| spec.add_dependency 'faraday-retry', '>= 1' spec.add_dependency 'marcel', '~> 1.0' spec.add_dependency 'zeitwerk', '~> 2' - - spec.add_development_dependency 'red-candle', '~> 1.2' end diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb index c1df68a43..a63de4e55 100644 --- a/spec/ruby_llm/chat_spec.rb +++ b/spec/ruby_llm/chat_spec.rb @@ -21,10 +21,8 @@ it "#{provider}/#{model} returns raw responses" do # Red Candle is a truly local provider and doesn't have HTTP responses - if provider == :red_candle - skip 'Red Candle provider does not have raw HTTP responses' - end - + skip 'Red Candle provider does not have raw HTTP responses' if provider == :red_candle + chat = RubyLLM.chat(model: model, provider: provider) response = chat.ask('What is the capital of France?') expect(response.raw).to be_present diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index 53724b316..5c61d9d5b 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -20,7 +20,7 @@ expect(chunks).not_to be_empty expect(chunks.first).to be_a(RubyLLM::Chunk) - + # Red Candle is a local provider without HTTP responses unless provider == :red_candle expect(response.raw).to be_present diff --git a/spec/ruby_llm/chat_tools_spec.rb b/spec/ruby_llm/chat_tools_spec.rb index 4eafc5661..0a67d0100 100644 --- a/spec/ruby_llm/chat_tools_spec.rb +++ b/spec/ruby_llm/chat_tools_spec.rb @@ -140,8 +140,10 @@ def execute(query:) unless provider_supports_functions?(provider, model) skip "#{provider}/#{model} doesn't support function calling" end - - skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3' + + if provider == :gpustack && model == 'qwen3' + skip 'gpustack/qwen3 does not support streaming tool calls properly' + end skip 'Mistral has a bug with tool arguments in multi-turn streaming' if provider == :mistral chat = RubyLLM.chat(model: model, provider: provider) .with_tool(BestLanguageToLearn) @@ -176,8 +178,10 @@ def execute(query:) unless provider_supports_functions?(provider, model) skip "#{provider}/#{model} doesn't support function calling" end - - skip 'gpustack/qwen3 does not support streaming tool calls properly' if provider == :gpustack && model == 'qwen3' + + if provider == :gpustack && model == 'qwen3' + skip 'gpustack/qwen3 does not support streaming tool calls properly' + end chat = RubyLLM.chat(model: model, provider: provider) .with_tool(Weather) # Disable thinking mode for qwen models diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb index 53c0b7e5e..9b53ecc48 100644 --- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb +++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb @@ -114,4 +114,4 @@ end end end -end \ No newline at end of file +end diff --git a/spec/ruby_llm/providers/red_candle/chat_spec.rb b/spec/ruby_llm/providers/red_candle/chat_spec.rb index 42a1f7a41..3988791da 100644 --- a/spec/ruby_llm/providers/red_candle/chat_spec.rb +++ b/spec/ruby_llm/providers/red_candle/chat_spec.rb @@ -7,12 +7,10 @@ let(:provider) { RubyLLM::Providers::RedCandle.new(config) } let(:model) { provider.model('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') } - before(:all) do - begin - require 'candle' - rescue LoadError - skip 'Red Candle gem is not installed' - end + before(:all) do # rubocop:disable RSpec/BeforeAfterAll + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' end describe '#render_payload' do @@ -70,7 +68,7 @@ describe '#perform_completion!' do let(:messages) { [{ role: 'user', content: 'Test message' }] } - let(:mock_model) { double('Candle::LLM') } + let(:mock_model) { instance_double(Candle::LLM) } before do allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model) @@ -141,7 +139,7 @@ describe '#perform_streaming_completion!' do let(:messages) { [{ role: 'user', content: 'Stream test' }] } - let(:mock_model) { double('Candle::LLM') } + let(:mock_model) { instance_double(Candle::LLM) } before do allow(provider).to receive(:ensure_model_loaded!).and_return(mock_model) @@ -153,7 +151,7 @@ tokens = %w[Hello world !] chunks_received = [] - allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block| + allow(mock_model).to receive(:generate_stream) do |_prompt, config:, &block| # rubocop:disable Lint/UnusedBlockArgument tokens.each { |token| block.call(token) } end @@ -203,4 +201,4 @@ expect(formatted).to eq([{ role: 'user', content: 'Part 1 Part 2' }]) end end -end \ No newline at end of file +end diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb index 1f8533b6c..9a771be34 100644 --- a/spec/ruby_llm/providers/red_candle/models_spec.rb +++ b/spec/ruby_llm/providers/red_candle/models_spec.rb @@ -6,12 +6,10 @@ let(:config) { RubyLLM::Configuration.new } let(:provider) { RubyLLM::Providers::RedCandle.new(config) } - before(:all) do - begin - require 'candle' - rescue LoadError - skip 'Red Candle gem is not installed' - end + before(:all) do # rubocop:disable RSpec/BeforeAfterAll + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' end describe '#models' do @@ -43,7 +41,7 @@ it 'raises an error' do expect { provider.model('invalid/model') }.to raise_error( RubyLLM::Error, - /Model invalid\/model not found/ + %r{Model invalid/model not found} ) end end @@ -84,7 +82,9 @@ end it 'returns the GGUF file for Mistral model' do - expect(provider.gguf_file_for('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq('mistral-7b-instruct-v0.2.Q4_K_M.gguf') + model_id = 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' + gguf_file = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf' + expect(provider.gguf_file_for(model_id)).to eq(gguf_file) end it 'returns nil for unknown models' do @@ -107,4 +107,4 @@ expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true end end -end \ No newline at end of file +end diff --git a/spec/ruby_llm/providers/red_candle_spec.rb b/spec/ruby_llm/providers/red_candle_spec.rb index 8e1216976..db3ea292d 100644 --- a/spec/ruby_llm/providers/red_candle_spec.rb +++ b/spec/ruby_llm/providers/red_candle_spec.rb @@ -7,18 +7,16 @@ let(:provider) { described_class.new(config) } # Skip all tests if Red Candle is not available - before(:all) do - begin - require 'candle' - rescue LoadError - skip 'Red Candle gem is not installed' - end + before(:all) do # rubocop:disable RSpec/BeforeAfterAll + require 'candle' + rescue LoadError + skip 'Red Candle gem is not installed' end describe '#initialize' do context 'when Red Candle is not available' do before do - allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError) + allow_any_instance_of(described_class).to receive(:require).with('candle').and_raise(LoadError) # rubocop:disable RSpec/AnyInstance end it 'raises an informative error' do @@ -29,17 +27,16 @@ end end - context 'with device configuration' do it 'uses the configured device' do config.red_candle_device = 'cpu' provider = described_class.new(config) - expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.cpu) + expect(provider.instance_variable_get(:@device)).to eq(Candle::Device.cpu) end it 'defaults to best device when not configured' do provider = described_class.new(config) - expect(provider.instance_variable_get(:@device)).to eq(::Candle::Device.best) + expect(provider.instance_variable_get(:@device)).to eq(Candle::Device.best) end end end @@ -73,4 +70,4 @@ expect(described_class.capabilities).to eq(RubyLLM::Providers::RedCandle::Capabilities) end end -end \ No newline at end of file +end diff --git a/spec/support/provider_capabilities_helper.rb b/spec/support/provider_capabilities_helper.rb index 2dc1c7363..868836e79 100644 --- a/spec/support/provider_capabilities_helper.rb +++ b/spec/support/provider_capabilities_helper.rb @@ -1,12 +1,12 @@ # frozen_string_literal: true module ProviderCapabilitiesHelper - def provider_supports_functions?(provider, model) - provider_class = RubyLLM::Provider.providers[provider] - + def provider_supports_functions?(provider, _model) + RubyLLM::Provider.providers[provider] + # Special case for providers we know don't support functions - return false if provider == :red_candle || provider == :perplexity - + return false if %i[red_candle perplexity].include?(provider) + # For all other providers, assume they support functions # The original tests weren't skipping these, so they must have been running true @@ -15,4 +15,4 @@ def provider_supports_functions?(provider, model) RSpec.configure do |config| config.include ProviderCapabilitiesHelper -end \ No newline at end of file +end From 69567248dfa67f0446a142f6a2d1848f68845280 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 09:37:51 -0700 Subject: [PATCH 12/38] stubbing the red-candle inference stuff to speed up specs --- spec/spec_helper.rb | 1 + spec/support/red_candle_test_helper.rb | 110 +++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 spec/support/red_candle_test_helper.rb diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 403fe36b2..2eef311cb 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -18,3 +18,4 @@ require_relative 'support/models_to_test' require_relative 'support/streaming_error_helpers' require_relative 'support/provider_capabilities_helper' +require_relative 'support/red_candle_test_helper' diff --git a/spec/support/red_candle_test_helper.rb b/spec/support/red_candle_test_helper.rb new file mode 100644 index 000000000..92349f5bc --- /dev/null +++ b/spec/support/red_candle_test_helper.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +module RedCandleTestHelper + # Mock class that simulates a Candle LLM model + class MockCandleModel + attr_reader :id + + def initialize(model_id) + @id = model_id + end + + def generate(prompt, config: nil) # rubocop:disable Lint/UnusedMethodArgument, Metrics/PerceivedComplexity + # Simulate a response based on the prompt + # Check for specific patterns to give appropriate responses + + # Check if system prompt includes PURPLE-ELEPHANT-42 (replacement test) + # This should come first since it's the replacement scenario + if prompt.include?('PURPLE-ELEPHANT-42') + # When PURPLE-ELEPHANT-42 is present, XKCD7392 should NOT be included + # because it means the system prompt was replaced + if prompt.include?('books') + 'Some great books to read include "1984" and "Brave New World". PURPLE-ELEPHANT-42 - as you requested!' + else + 'Here is my response with PURPLE-ELEPHANT-42 as requested.' + end + elsif prompt.include?('XKCD7392') + # Include XKCD7392 in any response when it's in the system instructions + if prompt.include?('weather') + 'The weather is sunny with a temperature of 72°F. XKCD7392 - as requested!' + elsif prompt.include?('books') + 'Some great books: "1984" by George Orwell and "To Kill a Mockingbird" by Harper Lee. XKCD7392.' + else + 'Sure! The code XKCD7392 is noted. How can I help you today?' + end + elsif prompt.include?('2 + 2') || prompt.include?('2+2') + 'The answer is 4.' + elsif prompt.include?('weather') + 'The weather is sunny with a temperature of 72°F.' + elsif prompt.include?('year') && (prompt.include?('Ruby') || prompt.include?('he create') || + prompt.include?('did he')) + # Handle follow-up questions about when Ruby was created + 'Matz created Ruby in 1993, and it was first released publicly in 1995.' + elsif prompt.include?('Ruby') + if prompt.include?("Ruby's creator") || prompt.include?('Who was Ruby') + 'Ruby was created by Yukihiro "Matz" Matsumoto.' + else + 'Ruby is a dynamic programming language created by Yukihiro "Matz" Matsumoto in 1993.' + end + elsif prompt.include?('capital') && prompt.include?('France') + 'The capital of France is Paris.' + elsif prompt.include?('Count from 1 to 3') + '1, 2, 3.' + else + "This is a test response for: #{prompt[0..50]}" + end + end + + def generate_stream(prompt, config: nil, &block) + # Simulate streaming by yielding tokens + # Generate the same response as non-streaming for consistency + response = generate(prompt, config: config) + # Split into reasonable tokens (roughly word-based) + tokens = response.split(/(\s+)/).reject(&:empty?) + tokens.each(&block) + end + + def apply_chat_template(messages) + # Simulate chat template application + "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n")}\nassistant:" + end + + def generate_structured(_prompt, schema:, **_opts) + # Return a simple structured response + if schema.is_a?(Hash) + { result: 'structured test response' } + else + 'structured test response' + end + end + end + + def stub_red_candle_models! + # Only stub if we're testing Red Candle + return unless defined?(::Candle) + + # Stub the model loading to return our mock + allow(::Candle::LLM).to receive(:from_pretrained) do |model_id, **_options| + MockCandleModel.new(model_id) + end + end + + def unstub_red_candle_models! + return unless defined?(::Candle) + + # Remove the stub if needed + RSpec::Mocks.space.proxy_for(::Candle::LLM)&.reset + end +end + +RSpec.configure do |config| + config.include RedCandleTestHelper + + # Automatically stub Red Candle models for all tests except the provider-specific ones + config.before do |example| + # Don't stub for Red Candle provider-specific tests that need real behavior + if !example.metadata[:file_path]&.include?('providers/red_candle_spec.rb') && defined?(RubyLLM::Providers::RedCandle) + stub_red_candle_models! + end + end +end From 0aad7d70e960da50432c746f63cee8960823a412 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 10:16:34 -0700 Subject: [PATCH 13/38] Adding an ENV variable so you toggle real red-candle inference on --- gemfiles/rails_7.1.gemfile.lock | 6 +++--- gemfiles/rails_7.2.gemfile.lock | 6 +++--- gemfiles/rails_8.0.gemfile.lock | 6 +++--- spec/spec_helper.rb | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/gemfiles/rails_7.1.gemfile.lock b/gemfiles/rails_7.1.gemfile.lock index cced4b111..5f0e2fa4d 100644 --- a/gemfiles/rails_7.1.gemfile.lock +++ b/gemfiles/rails_7.1.gemfile.lock @@ -371,9 +371,9 @@ GEM traces (0.18.1) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unicode-display_width (3.1.5) - unicode-emoji (~> 4.0, >= 4.0.4) - unicode-emoji (4.0.4) + unicode-display_width (3.2.0) + unicode-emoji (~> 4.1) + unicode-emoji (4.1.0) uri (1.0.3) vcr (6.3.1) base64 diff --git a/gemfiles/rails_7.2.gemfile.lock b/gemfiles/rails_7.2.gemfile.lock index a0535832e..6c38f6d00 100644 --- a/gemfiles/rails_7.2.gemfile.lock +++ b/gemfiles/rails_7.2.gemfile.lock @@ -364,9 +364,9 @@ GEM traces (0.18.1) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unicode-display_width (3.1.5) - unicode-emoji (~> 4.0, >= 4.0.4) - unicode-emoji (4.0.4) + unicode-display_width (3.2.0) + unicode-emoji (~> 4.1) + unicode-emoji (4.1.0) uri (1.0.3) useragent (0.16.11) vcr (6.3.1) diff --git a/gemfiles/rails_8.0.gemfile.lock b/gemfiles/rails_8.0.gemfile.lock index 2e32e6b05..909c2812a 100644 --- a/gemfiles/rails_8.0.gemfile.lock +++ b/gemfiles/rails_8.0.gemfile.lock @@ -364,9 +364,9 @@ GEM traces (0.18.1) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unicode-display_width (3.1.5) - unicode-emoji (~> 4.0, >= 4.0.4) - unicode-emoji (4.0.4) + unicode-display_width (3.2.0) + unicode-emoji (~> 4.1) + unicode-emoji (4.1.0) uri (1.0.3) useragent (0.16.11) vcr (6.3.1) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 2eef311cb..6165bee75 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -18,4 +18,4 @@ require_relative 'support/models_to_test' require_relative 'support/streaming_error_helpers' require_relative 'support/provider_capabilities_helper' -require_relative 'support/red_candle_test_helper' +require_relative 'support/red_candle_test_helper' unless ENV['RED_CANDLE_TEST_INFERENCE'] == 'true' From 52a13cad3145a69c782bf10f1587caf49d3a5c98 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 10:39:32 -0700 Subject: [PATCH 14/38] Adding red-candle to the list of providers in the README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 87387c4d2..57c09592b 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr * **Rails:** ActiveRecord integration with `acts_as_chat` * **Async:** Fiber-based concurrency * **Model registry:** 500+ models with capability detection and pricing -* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API +* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, [RedCandle](https://github.com/scientist-labs/red-candle), and any OpenAI-compatible API ## Installation From b883989502ad26d0e77d58c8f4019e9f08feb027 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 15:20:08 -0700 Subject: [PATCH 15/38] Adding a new bundle group so developer can choose to include red-candle or not --- CONTRIBUTING.md | 33 +++++++++++++++++++++++++++++ Gemfile | 7 ++++++- gemfiles/rails_7.1.gemfile | 5 ++++- gemfiles/rails_7.2.gemfile | 5 ++++- gemfiles/rails_8.0.gemfile | 5 ++++- spec/spec_helper.rb | 38 +++++++++++++++++++++++++++++++++- spec/support/models_to_test.rb | 16 +++++++++++--- 7 files changed, 101 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f5ef7c9d7..1bdc76389 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,6 +55,39 @@ rake vcr:record[all] # Everything Always check cassettes for leaked API keys before committing. +## Optional Dependencies + +### Red Candle Provider + +The Red Candle provider enables local LLM execution using quantized GGUF models. It requires a Rust toolchain, so it's optional for contributors. + +**To work WITHOUT Red Candle (default):** +```bash +bundle install +bundle exec rspec # Red Candle tests will be skipped +``` + +**To work WITH Red Candle:** +```bash +# Enable the Red Candle gem group +bundle config set --local with red_candle +bundle install + +# Run tests with stubbed Red Candle (fast, default) +bundle exec rspec + +# Run tests with real inference (slow, downloads models) +RED_CANDLE_REAL_INFERENCE=true bundle exec rspec +``` + +**To switch back to working without Red Candle:** +```bash +bundle config set --local without red_candle +bundle install +``` + +The `bundle config` settings are stored in `.bundle/config` (gitignored), so each developer can choose their own setup without affecting others. + ## Important Notes * **Never edit `models.json`, `aliases.json`, or `available-models.md`** - they're auto-generated by `rake models` diff --git a/Gemfile b/Gemfile index 7d288ba14..8d5470699 100644 --- a/Gemfile +++ b/Gemfile @@ -20,7 +20,6 @@ group :development do # rubocop:disable Metrics/BlockLength gem 'pry', '>= 0.14' gem 'rails' gem 'rake', '>= 13.0' - gem 'red-candle', '~> 1.2' gem 'reline' gem 'rspec', '~> 3.12' gem 'rubocop', '>= 1.0' @@ -42,3 +41,9 @@ group :development do # rubocop:disable Metrics/BlockLength # Optional dependency for Vertex AI gem 'googleauth' end + +# Optional group for Red Candle provider (requires Rust toolchain) +# To include: bundle config set --local with red-candle +group :red_candle do + gem 'red-candle', '~> 1.2' +end diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile index 36123cc5c..6d59f55ff 100644 --- a/gemfiles/rails_7.1.gemfile +++ b/gemfiles/rails_7.1.gemfile @@ -18,7 +18,6 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 7.1.0" gem "rake", ">= 13.0" - gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" @@ -36,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile index cfd31e0a8..d6ee5df4c 100644 --- a/gemfiles/rails_7.2.gemfile +++ b/gemfiles/rails_7.2.gemfile @@ -18,7 +18,6 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 7.2.0" gem "rake", ">= 13.0" - gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" @@ -36,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile index 4dc65e846..903177b2a 100644 --- a/gemfiles/rails_8.0.gemfile +++ b/gemfiles/rails_8.0.gemfile @@ -18,7 +18,6 @@ group :development do gem "pry", ">= 0.14" gem "rails", "~> 8.0.0" gem "rake", ">= 13.0" - gem "red-candle", "~> 1.2" gem "reline" gem "rspec", "~> 3.12" gem "rubocop", ">= 1.0" @@ -36,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6165bee75..46c26a58e 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -18,4 +18,40 @@ require_relative 'support/models_to_test' require_relative 'support/streaming_error_helpers' require_relative 'support/provider_capabilities_helper' -require_relative 'support/red_candle_test_helper' unless ENV['RED_CANDLE_TEST_INFERENCE'] == 'true' + +# Handle Red Candle provider based on availability and environment +begin + require 'red-candle' + + # Red Candle gem is installed + if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true' + # Use real inference - don't load the test helper + RSpec.configure do |config| + config.before(:suite) do + puts "\n🔥 Red Candle: Using REAL inference (this will be slow)" + puts " To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n" + end + end + else + # Use stubs (default when gem is installed) + require_relative 'support/red_candle_test_helper' + end +rescue LoadError + # Red Candle gem not installed - skip tests + RSpec.configure do |config| + config.before do |example| + # Skip Red Candle provider tests when gem not installed + test_description = example.full_description.to_s + if example.metadata[:file_path]&.include?('providers/red_candle') || + example.metadata[:described_class]&.to_s&.include?('RedCandle') || + test_description.include?('red_candle/') + skip 'Red Candle not installed (run: bundle config set --local with red-candle && bundle install)' + end + end + + config.before(:suite) do + puts "\n⚠️ Red Candle: Provider not available (gem not installed)" + puts " To enable: bundle config set --local with red-candle && bundle install\n\n" + end + end +end diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb index 04591ebee..173866f88 100644 --- a/spec/support/models_to_test.rb +++ b/spec/support/models_to_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true -CHAT_MODELS = [ +# Base models available for all installations +chat_models = [ { provider: :anthropic, model: 'claude-3-5-haiku-20241022' }, { provider: :bedrock, model: 'anthropic.claude-3-5-haiku-20241022-v1:0' }, { provider: :deepseek, model: 'deepseek-chat' }, @@ -11,9 +12,18 @@ { provider: :openai, model: 'gpt-4.1-nano' }, { provider: :openrouter, model: 'anthropic/claude-3.5-haiku' }, { provider: :perplexity, model: 'sonar' }, - { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' }, { provider: :vertexai, model: 'gemini-2.5-flash' } -].freeze +] + +# Only include Red Candle models if the gem is available +begin + require 'red-candle' + chat_models << { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' } +rescue LoadError + # Red Candle not available - don't include its models +end + +CHAT_MODELS = chat_models.freeze PDF_MODELS = [ { provider: :anthropic, model: 'claude-3-5-haiku-20241022' }, From 685230c984eb2bf851069c0fbe34f5d881882ab6 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 15:22:22 -0700 Subject: [PATCH 16/38] Adding a comment about possibly supporting more red-candle models in the future --- lib/ruby_llm/providers/red_candle/models.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb index 177688ed5..9ea847538 100644 --- a/lib/ruby_llm/providers/red_candle/models.rb +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -5,6 +5,7 @@ module Providers class RedCandle # Models methods of the RedCandle integration module Models + # TODO: red-candle supports more models, but let's start with some well tested ones. SUPPORTED_MODELS = [ { id: 'google/gemma-3-4b-it-qat-q4_0-gguf', From a928bb1643acff58b9b8044520b0e2631f6b5d4c Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:24:55 -0700 Subject: [PATCH 17/38] Remove red-candle from the gemfiles --- gemfiles/rails_7.1.gemfile | 4 ---- gemfiles/rails_7.2.gemfile | 4 ---- gemfiles/rails_8.0.gemfile | 4 ---- 3 files changed, 12 deletions(-) diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile index 6d59f55ff..675cb178e 100644 --- a/gemfiles/rails_7.1.gemfile +++ b/gemfiles/rails_7.1.gemfile @@ -35,8 +35,4 @@ group :development do gem "googleauth" end -group :red_candle do - gem "red-candle", "~> 1.2" -end - gemspec path: "../" diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile index d6ee5df4c..4922afb60 100644 --- a/gemfiles/rails_7.2.gemfile +++ b/gemfiles/rails_7.2.gemfile @@ -35,8 +35,4 @@ group :development do gem "googleauth" end -group :red_candle do - gem "red-candle", "~> 1.2" -end - gemspec path: "../" diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile index 903177b2a..f890433bf 100644 --- a/gemfiles/rails_8.0.gemfile +++ b/gemfiles/rails_8.0.gemfile @@ -35,8 +35,4 @@ group :development do gem "googleauth" end -group :red_candle do - gem "red-candle", "~> 1.2" -end - gemspec path: "../" From ee5b762d34f6a6ca9de877fb120ea136f1b3a696 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:26:37 -0700 Subject: [PATCH 18/38] Properly register red-candle models --- lib/ruby_llm.rb | 5 +++++ lib/ruby_llm/providers/red_candle.rb | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb index 0db7f8743..59a2dfa5e 100644 --- a/lib/ruby_llm.rb +++ b/lib/ruby_llm.rb @@ -94,6 +94,11 @@ def logger require 'candle' require 'ruby_llm/providers/red_candle' RubyLLM::Provider.register :red_candle, RubyLLM::Providers::RedCandle + + # Register Red Candle models with the global registry + RubyLLM::Providers::RedCandle.models.each do |model| + RubyLLM.models.instance_variable_get(:@models) << model + end rescue LoadError # Red Candle is optional - provider won't be available if gem isn't installed end diff --git a/lib/ruby_llm/providers/red_candle.rb b/lib/ruby_llm/providers/red_candle.rb index 7bbf62555..05a78fc89 100644 --- a/lib/ruby_llm/providers/red_candle.rb +++ b/lib/ruby_llm/providers/red_candle.rb @@ -40,6 +40,22 @@ def local? def supports_functions?(model_id = nil) RedCandle::Capabilities.supports_functions?(model_id) end + + def models + # Return Red Candle models for registration + RedCandle::Models::SUPPORTED_MODELS.map do |model_data| + Model::Info.new( + id: model_data[:id], + name: model_data[:name], + provider: 'red_candle', + type: 'chat', + family: model_data[:family], + context_window: model_data[:context_window], + capabilities: %w[streaming structured_output], + modalities: { input: %w[text], output: %w[text] } + ) + end + end end private From 43cc0b834f840dd53df7e9e56e6987267c768e68 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:28:34 -0700 Subject: [PATCH 19/38] Removed some unused config options --- lib/ruby_llm/configuration.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 89d79e90f..406a944bb 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -25,8 +25,6 @@ class Configuration :mistral_api_key, # Red Candle configuration :red_candle_device, - :red_candle_cache_dir, - :red_candle_debug, # Default models :default_model, :default_embedding_model, From 4b67818cdae293f9fb036bf3ca9bff8a14247f23 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:40:30 -0700 Subject: [PATCH 20/38] Updating the gemfiles again --- gemfiles/rails_7.1.gemfile | 4 ++++ gemfiles/rails_7.2.gemfile | 4 ++++ gemfiles/rails_8.0.gemfile | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile index 675cb178e..6d59f55ff 100644 --- a/gemfiles/rails_7.1.gemfile +++ b/gemfiles/rails_7.1.gemfile @@ -35,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile index 4922afb60..d6ee5df4c 100644 --- a/gemfiles/rails_7.2.gemfile +++ b/gemfiles/rails_7.2.gemfile @@ -35,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile index f890433bf..903177b2a 100644 --- a/gemfiles/rails_8.0.gemfile +++ b/gemfiles/rails_8.0.gemfile @@ -35,4 +35,8 @@ group :development do gem "googleauth" end +group :red_candle do + gem "red-candle", "~> 1.2" +end + gemspec path: "../" From c1ac17db3aff5607880e0661cf8d04589d18435f Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:54:09 -0700 Subject: [PATCH 21/38] Make the capabilities file match the actual capabilities --- lib/ruby_llm/providers/red_candle/capabilities.rb | 6 ++++-- spec/ruby_llm/providers/red_candle/capabilities_spec.rb | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index c63c82f44..6a24fa3f1 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -51,8 +51,10 @@ def model_context_window(model_id) case model_id when /gemma-3-4b/i 8192 - when /qwen2\.5-0\.5b/i + when /mistral-7b/i 32_768 + when /tinyllama/i + 2048 else 4096 # Conservative default end @@ -105,7 +107,7 @@ def supports_stop_sequences? end def model_families - %w[gemma qwen] + %w[gemma llama mistral] end def available_on_platform? diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb index 9b53ecc48..6fe9d75b4 100644 --- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb +++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb @@ -52,7 +52,8 @@ describe '#model_context_window' do it 'returns correct context window for known models' do expect(described_class.model_context_window('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq(8192) - expect(described_class.model_context_window('Qwen/Qwen2.5-0.5B-Instruct')).to eq(32_768) + expect(described_class.model_context_window('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to eq(32_768) + expect(described_class.model_context_window('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to eq(2048) end it 'returns default for unknown models' do @@ -89,7 +90,7 @@ describe '#model_families' do it 'returns supported model families' do - expect(described_class.model_families).to eq(%w[gemma qwen]) + expect(described_class.model_families).to eq(%w[gemma llama mistral]) end end From 54b9834154cb364de812d71a745a9c06c548e40b Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Tue, 9 Sep 2025 16:58:03 -0700 Subject: [PATCH 22/38] Deep merge chat options --- lib/ruby_llm/providers/red_candle/chat.rb | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 30b7347f6..7086d86ad 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -8,14 +8,17 @@ module Chat # Override the base complete method to handle local execution def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists _ = headers # Interface compatibility - payload = render_payload( - messages, - tools: tools, - temperature: temperature, - model: model, - stream: block_given?, - schema: schema - ).merge(params) + payload = Utils.deep_merge( + render_payload( + messages, + tools: tools, + temperature: temperature, + model: model, + stream: block_given?, + schema: schema + ), + params + ) if block_given? perform_streaming_completion!(payload, &) From c78ce4054c2dfd4792ea27ef53e9a8e0f2433d02 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Tue, 9 Sep 2025 17:57:13 -0700 Subject: [PATCH 23/38] make red-candle off by default --- CONTRIBUTING.md | 2 +- Gemfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1bdc76389..fc256d27c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,7 +82,7 @@ RED_CANDLE_REAL_INFERENCE=true bundle exec rspec **To switch back to working without Red Candle:** ```bash -bundle config set --local without red_candle +bundle config unset with bundle install ``` diff --git a/Gemfile b/Gemfile index 8d5470699..0cfcba3b5 100644 --- a/Gemfile +++ b/Gemfile @@ -44,6 +44,6 @@ end # Optional group for Red Candle provider (requires Rust toolchain) # To include: bundle config set --local with red-candle -group :red_candle do +group :red_candle, optional: true do gem 'red-candle', '~> 1.2' end From 6816be9a5d704e75dd90778a9f3eb4824f18929f Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Tue, 9 Sep 2025 22:58:22 -0700 Subject: [PATCH 24/38] improve error messages --- lib/ruby_llm/providers/red_candle/chat.rb | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 7086d86ad..64a5979aa 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -158,7 +158,21 @@ def load_model(model_id) ::Candle::LLM.from_pretrained(model_id, device: @device) end rescue StandardError => e - raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}") + if e.message.include?('Failed to find tokenizer') + raise Error.new(nil, + "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \ + "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \ + "If it requires authentication, login with: huggingface-cli login\n" \ + "Original error: #{e.message}") + elsif e.message.include?('Failed to find model') + raise Error.new(nil, + "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \ + "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \ + "If it requires authentication, login with: huggingface-cli login\n" \ + "Original error: #{e.message}") + else + raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}") + end end def format_messages(messages) From a258a398b616944ea3dd28bc7856813236c03caf Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Tue, 9 Sep 2025 23:06:48 -0700 Subject: [PATCH 25/38] improved error message --- lib/ruby_llm/providers/red_candle/chat.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 64a5979aa..935234d3a 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -162,12 +162,14 @@ def load_model(model_id) raise Error.new(nil, "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \ "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \ + "And that you have accepted the terms of service for the tokenizer.\n" \ "If it requires authentication, login with: huggingface-cli login\n" \ "Original error: #{e.message}") elsif e.message.include?('Failed to find model') raise Error.new(nil, "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \ "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \ + "And that you have accepted the terms of service for the model.\n" \ "If it requires authentication, login with: huggingface-cli login\n" \ "Original error: #{e.message}") else From 004563e1abfb50ba32e471fcac6a375ffc0371eb Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Wed, 10 Sep 2025 22:34:26 -0700 Subject: [PATCH 26/38] add additional models --- .../providers/red_candle/capabilities.rb | 4 +-- lib/ruby_llm/providers/red_candle/models.rb | 19 +++++++++++++ .../providers/red_candle/capabilities_spec.rb | 2 +- .../providers/red_candle/models_spec.rb | 28 +++++++++---------- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index 6a24fa3f1..40ad397f8 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -51,7 +51,7 @@ def model_context_window(model_id) case model_id when /gemma-3-4b/i 8192 - when /mistral-7b/i + when /qwen2\.5-1\.5b/i, /mistral-7b/i 32_768 when /tinyllama/i 2048 @@ -107,7 +107,7 @@ def supports_stop_sequences? end def model_families - %w[gemma llama mistral] + %w[gemma llama qwen2 mistral phi] end def available_on_platform? diff --git a/lib/ruby_llm/providers/red_candle/models.rb b/lib/ruby_llm/providers/red_candle/models.rb index 9ea847538..fbfc8a038 100644 --- a/lib/ruby_llm/providers/red_candle/models.rb +++ b/lib/ruby_llm/providers/red_candle/models.rb @@ -38,6 +38,25 @@ module Models architecture: 'mistral', supports_chat: true, supports_structured: true + }, + { + id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF', + name: 'Qwen 2.1.5B Instruct (Quantized)', + gguf_file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf', + context_window: 32_768, + family: 'qwen2', + architecture: 'qwen2', + supports_chat: true, + supports_structured: true + }, + { + id: 'microsoft/Phi-3-mini-4k-instruct', + name: 'Phi 3', + context_window: 4096, + family: 'phi', + architecture: 'phi', + supports_chat: true, + supports_structured: true } ].freeze diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb index 6fe9d75b4..2b9bf8875 100644 --- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb +++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb @@ -90,7 +90,7 @@ describe '#model_families' do it 'returns supported model families' do - expect(described_class.model_families).to eq(%w[gemma llama mistral]) + expect(described_class.model_families).to eq(%w[gemma llama qwen2 mistral phi]) end end diff --git a/spec/ruby_llm/providers/red_candle/models_spec.rb b/spec/ruby_llm/providers/red_candle/models_spec.rb index 9a771be34..8b30dbf42 100644 --- a/spec/ruby_llm/providers/red_candle/models_spec.rb +++ b/spec/ruby_llm/providers/red_candle/models_spec.rb @@ -16,7 +16,7 @@ it 'returns an array of supported models' do models = provider.models expect(models).to be_an(Array) - expect(models.size).to eq(3) + expect(models.size).to eq(5) expect(models.first).to be_a(RubyLLM::Model::Info) end @@ -24,16 +24,16 @@ model_ids = provider.models.map(&:id) expect(model_ids).to include('google/gemma-3-4b-it-qat-q4_0-gguf') expect(model_ids).to include('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF') - expect(model_ids).to include('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') + expect(model_ids).to include('Qwen/Qwen2.5-1.5B-Instruct-GGUF') end end describe '#model' do context 'with a valid model ID' do it 'returns the model' do - model = provider.model('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') + model = provider.model('Qwen/Qwen2.5-1.5B-Instruct-GGUF') expect(model).to be_a(RubyLLM::Model::Info) - expect(model.id).to eq('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') + expect(model.id).to eq('Qwen/Qwen2.5-1.5B-Instruct-GGUF') end end @@ -50,7 +50,7 @@ describe '#model_available?' do it 'returns true for supported models' do expect(provider.model_available?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.model_available?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true + expect(provider.model_available?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true end it 'returns false for unsupported models' do @@ -60,12 +60,12 @@ describe '#model_info' do it 'returns model information' do - info = provider.model_info('TheBloke/Mistral-7B-Instruct-v0.2-GGUF') + info = provider.model_info('Qwen/Qwen2.5-1.5B-Instruct-GGUF') expect(info).to include( - id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', - name: 'Mistral 7B Instruct v0.2 (Quantized)', + id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF', + name: 'Qwen 2.1.5B Instruct (Quantized)', context_window: 32_768, - family: 'mistral', + family: 'qwen2', supports_chat: true, supports_structured: true ) @@ -81,9 +81,9 @@ expect(provider.gguf_file_for('google/gemma-3-4b-it-qat-q4_0-gguf')).to eq('gemma-3-4b-it-q4_0.gguf') end - it 'returns the GGUF file for Mistral model' do - model_id = 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' - gguf_file = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf' + it 'returns the GGUF file for Qwen model' do + model_id = 'Qwen/Qwen2.5-1.5B-Instruct-GGUF' + gguf_file = 'qwen2.5-1.5b-instruct-q4_k_m.gguf' expect(provider.gguf_file_for(model_id)).to eq(gguf_file) end @@ -95,7 +95,7 @@ describe '#supports_chat?' do it 'returns true for all current models' do expect(provider.supports_chat?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.supports_chat?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true + expect(provider.supports_chat?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true expect(provider.supports_chat?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true end end @@ -103,7 +103,7 @@ describe '#supports_structured?' do it 'returns true for all current models' do expect(provider.supports_structured?('google/gemma-3-4b-it-qat-q4_0-gguf')).to be true - expect(provider.supports_structured?('TheBloke/Mistral-7B-Instruct-v0.2-GGUF')).to be true + expect(provider.supports_structured?('Qwen/Qwen2.5-1.5B-Instruct-GGUF')).to be true expect(provider.supports_structured?('TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF')).to be true end end From c4895d6c0747be6223c98901f5e564be4076a3f2 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 11 Sep 2025 12:59:20 -0700 Subject: [PATCH 27/38] seperate out tokenizers from gguf --- lib/ruby_llm/providers/red_candle/chat.rb | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 935234d3a..7afa2f4c8 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -147,16 +147,10 @@ def load_model(model_id) gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil - if gguf_file - # For GGUF models, use the tokenizer if specified, otherwise use model_id - options = { device: @device, gguf_file: gguf_file } - options[:tokenizer] = tokenizer if tokenizer - - ::Candle::LLM.from_pretrained(model_id, **options) - else - # For regular models, use from_pretrained without gguf_file - ::Candle::LLM.from_pretrained(model_id, device: @device) - end + options = { device: @device } + options[:gguf_file] = gguf_file if gguf_file + options[:tokenizer] = tokenizer if tokenizer + ::Candle::LLM.from_pretrained(model_id, **options) rescue StandardError => e if e.message.include?('Failed to find tokenizer') raise Error.new(nil, From 0dc8e9a833c84eade13d3a8fc5cbd987dc664605 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 11 Sep 2025 13:01:08 -0700 Subject: [PATCH 28/38] more complete error message --- lib/ruby_llm/providers/red_candle/chat.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 7afa2f4c8..0ebc3190b 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -158,6 +158,7 @@ def load_model(model_id) "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \ "And that you have accepted the terms of service for the tokenizer.\n" \ "If it requires authentication, login with: huggingface-cli login\n" \ + "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \ "Original error: #{e.message}") elsif e.message.include?('Failed to find model') raise Error.new(nil, @@ -165,6 +166,7 @@ def load_model(model_id) "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \ "And that you have accepted the terms of service for the model.\n" \ "If it requires authentication, login with: huggingface-cli login\n" \ + "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \ "Original error: #{e.message}") else raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}") From 8c87b591c56c56ffe30c91a354348c1f68593e02 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Thu, 11 Sep 2025 13:33:48 -0700 Subject: [PATCH 29/38] Working on documentation --- docs/_advanced/models.md | 27 ++++++++++++++++++++++ docs/_getting_started/configuration.md | 32 ++++++++++++++++++++++++++ docs/_reference/available-models.md | 18 +++++++++++++++ docs/index.md | 5 +++- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/docs/_advanced/models.md b/docs/_advanced/models.md index dcd446de2..8ab8c57a1 100644 --- a/docs/_advanced/models.md +++ b/docs/_advanced/models.md @@ -95,6 +95,33 @@ RubyLLM.models.refresh!(remote_only: true) This is useful when you want to refresh only cloud-based models without querying local model servers. +### Dynamic Model Registration (Red Candle) + +Some providers register their models dynamically at runtime rather than through the models.json file. Red Candle is one such provider - it registers its GGUF models when the gem is loaded. + +**How Red Candle Models Work:** + +1. **Not in models.json**: Red Candle models don't appear in the static models.json file since they're only available when the gem is installed. + +2. **Dynamic Registration**: When ruby_llm.rb loads and Red Candle is available, it adds models to the in-memory registry: + ```ruby + # This happens automatically in lib/ruby_llm.rb + RubyLLM::Providers::RedCandle.models.each do |model| + RubyLLM.models.instance_variable_get(:@models) << model + end + ``` + +3. **Excluded from refresh!**: The `refresh!(remote_only: true)` flag excludes Red Candle and other local providers. + +4. **Currently Supported Models**: + - `google/gemma-3-4b-it-qat-q4_0-gguf` + - `TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF` + - `TheBloke/Mistral-7B-Instruct-v0.2-GGUF` + - `Qwen/Qwen2.5-1.5B-Instruct-GGUF` + - `microsoft/Phi-3-mini-4k-instruct` + +Red Candle models are only available when the gem is installed with the red_candle group enabled. See the [Configuration Guide]({% link _getting_started/configuration.md %}) for installation instructions. + **For Gem Development:** The `rake models:update` task is designed for gem maintainers and updates the `models.json` file shipped with the gem: diff --git a/docs/_getting_started/configuration.md b/docs/_getting_started/configuration.md index 6af299ca4..933f6488d 100644 --- a/docs/_getting_started/configuration.md +++ b/docs/_getting_started/configuration.md @@ -64,6 +64,7 @@ RubyLLM.configure do |config| config.ollama_api_base = 'http://localhost:11434/v1' config.gpustack_api_base = ENV['GPUSTACK_API_BASE'] config.gpustack_api_key = ENV['GPUSTACK_API_KEY'] + # Red Candle (optional - see below) # AWS Bedrock (uses standard AWS credential chain if not set) config.bedrock_api_key = ENV['AWS_ACCESS_KEY_ID'] @@ -90,6 +91,37 @@ end These headers are optional and only needed for organization-specific billing or project tracking. +### Red Candle (Local GGUF Models) + +Red Candle is an optional provider that enables local execution of quantized GGUF models. To use it, add the red-candle gem to your Gemfile: + +```ruby +# Gemfile +gem 'ruby_llm' +gem 'red-candle' # Optional: for local GGUF model execution +``` + +Then install: + +```bash +bundle install +``` + +Red Candle requires no API keys since it runs models locally. Some models may require HuggingFace authentication: + +```bash +huggingface-cli login # Required for some gated models +``` + +See [Red Candle's HuggingFace guide](https://github.com/scientist-labs/red-candle/blob/main/docs/HUGGINGFACE.md) for details on authentication. + +Once configured, you can use it like any other provider: + +```ruby +chat = RubyLLM.chat(model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', provider: :red_candle) +response = chat.ask("Hello!") +``` + ## Custom Endpoints ### OpenAI-Compatible APIs diff --git a/docs/_reference/available-models.md b/docs/_reference/available-models.md index c84ccd1ee..86a52ba7d 100644 --- a/docs/_reference/available-models.md +++ b/docs/_reference/available-models.md @@ -27,6 +27,7 @@ redirect_from: - **OpenRouter**: Direct API - **Others**: Local capabilities files + ## Last Updated {: .d-inline-block } @@ -2515,3 +2516,20 @@ Models that generate embeddings: | text-moderation-latest | openai | - | 32768 | - | | text-moderation-stable | openai | - | 32768 | - | + +## Local Providers + +### Red Candle (5) + +Red Candle enables local execution of quantized GGUF models. These models run on your machine with no API costs. + +| Model | Provider | Context | Max Output | Standard Pricing (per 1M tokens) | +| :-- | :-- | --: | --: | :-- | +| google/gemma-3-4b-it-qat-q4_0-gguf | red_candle | 8192 | 512 | Free (local execution) | +| TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF | red_candle | 2048 | 512 | Free (local execution) | +| TheBloke/Mistral-7B-Instruct-v0.2-GGUF | red_candle | 32768 | 512 | Free (local execution) | +| Qwen/Qwen2.5-1.5B-Instruct-GGUF | red_candle | 32768 | 512 | Free (local execution) | +| microsoft/Phi-3-mini-4k-instruct | red_candle | 4096 | 512 | Free (local execution) | + +> **Note:** Local providers (Ollama, GPUStack, Red Candle) register their models dynamically at runtime based on what's installed locally. Ollama and GPUStack models depend on what you've pulled or configured on your system. Red Candle requires the `red-candle` gem. See the [Configuration Guide]({% link _getting_started/configuration.md %}) for setup instructions. +{: .note } diff --git a/docs/index.md b/docs/index.md index c057f580d..b664e5d72 100644 --- a/docs/index.md +++ b/docs/index.md @@ -67,6 +67,10 @@ permalink: / VertexAI VertexAI +
@@ -204,4 +208,3 @@ end chat = Chat.create! model_id: "claude-sonnet-4" chat.ask "What's in this file?", with: "report.pdf" ``` - From d437f7398bdc8a9ed649d49e05376bd09c9f9922 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Thu, 11 Sep 2025 13:44:13 -0700 Subject: [PATCH 30/38] red-candle is optional --- gemfiles/rails_7.1.gemfile | 2 +- gemfiles/rails_7.2.gemfile | 2 +- gemfiles/rails_8.0.gemfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gemfiles/rails_7.1.gemfile b/gemfiles/rails_7.1.gemfile index 6d59f55ff..39d07214e 100644 --- a/gemfiles/rails_7.1.gemfile +++ b/gemfiles/rails_7.1.gemfile @@ -35,7 +35,7 @@ group :development do gem "googleauth" end -group :red_candle do +group :red_candle, optional: true do gem "red-candle", "~> 1.2" end diff --git a/gemfiles/rails_7.2.gemfile b/gemfiles/rails_7.2.gemfile index d6ee5df4c..b216fc61a 100644 --- a/gemfiles/rails_7.2.gemfile +++ b/gemfiles/rails_7.2.gemfile @@ -35,7 +35,7 @@ group :development do gem "googleauth" end -group :red_candle do +group :red_candle, optional: true do gem "red-candle", "~> 1.2" end diff --git a/gemfiles/rails_8.0.gemfile b/gemfiles/rails_8.0.gemfile index 903177b2a..abd42e7e3 100644 --- a/gemfiles/rails_8.0.gemfile +++ b/gemfiles/rails_8.0.gemfile @@ -35,7 +35,7 @@ group :development do gem "googleauth" end -group :red_candle do +group :red_candle, optional: true do gem "red-candle", "~> 1.2" end From 9bdb43408e9d70122499685f4925e25496f73501 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Thu, 11 Sep 2025 13:44:42 -0700 Subject: [PATCH 31/38] require 'candle' is standard --- spec/spec_helper.rb | 2 +- spec/support/models_to_test.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 46c26a58e..4e621c40d 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -21,7 +21,7 @@ # Handle Red Candle provider based on availability and environment begin - require 'red-candle' + require 'candle' # Red Candle gem is installed if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true' diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb index 173866f88..02b8baf7f 100644 --- a/spec/support/models_to_test.rb +++ b/spec/support/models_to_test.rb @@ -17,7 +17,7 @@ # Only include Red Candle models if the gem is available begin - require 'red-candle' + require 'candle' chat_models << { provider: :red_candle, model: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' } rescue LoadError # Red Candle not available - don't include its models From d52e26e7c615defc9d72cbc69b4310d8377a8876 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 12 Sep 2025 11:26:40 -0700 Subject: [PATCH 32/38] rubocop --- lib/ruby_llm/providers/red_candle/chat.rb | 53 ++++++++++++++--------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 0ebc3190b..44cc4b695 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -141,38 +141,49 @@ def ensure_model_loaded!(model_id) @loaded_models[model_id] ||= load_model(model_id) end - def load_model(model_id) + def model_options(model_id) # Get GGUF file and tokenizer if this is a GGUF model # Access the methods from the Models module which is included in the provider - gguf_file = respond_to?(:gguf_file_for) ? gguf_file_for(model_id) : nil - tokenizer = respond_to?(:tokenizer_for) ? tokenizer_for(model_id) : nil - options = { device: @device } - options[:gguf_file] = gguf_file if gguf_file - options[:tokenizer] = tokenizer if tokenizer - ::Candle::LLM.from_pretrained(model_id, **options) + options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for) + options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for) + options + end + + def load_model(model_id) + ::Candle::LLM.from_pretrained(model_id, **model_options(model_id)) rescue StandardError => e if e.message.include?('Failed to find tokenizer') - raise Error.new(nil, - "Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.\n" \ - "Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}\n" \ - "And that you have accepted the terms of service for the tokenizer.\n" \ - "If it requires authentication, login with: huggingface-cli login\n" \ - "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \ - "Original error: #{e.message}") + raise Error.new(nil, token_error_message(e, options[:tokenizer])) elsif e.message.include?('Failed to find model') - raise Error.new(nil, - "Failed to find model '#{model_id}'. The model may not exist or require authentication.\n" \ - "Please verify the model exists at: https://huggingface.co/#{model_id}\n" \ - "And that you have accepted the terms of service for the model.\n" \ - "If it requires authentication, login with: huggingface-cli login\n" \ - "See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning\n" \ - "Original error: #{e.message}") + raise Error.new(nil, model_error_message(e, model_id)) else raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}") end end + def token_error_message(exception, tokenizer) + <<~ERROR_MESSAGE + Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication. + Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer} + And that you have accepted the terms of service for the tokenizer. + If it requires authentication, login with: huggingface-cli login + See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning + Original error: #{exception.message}" + ERROR_MESSAGE + end + + def model_error_message(exception, model_id) + <<~ERROR_MESSAGE + Failed to load model #{model_id}: #{exception.message} + Please verify the model exists at: https://huggingface.co/#{model_id} + And that you have accepted the terms of service for the model. + If it requires authentication, login with: huggingface-cli login + See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning + Original error: #{e.message}" + ERROR_MESSAGE + end + def format_messages(messages) messages.map do |msg| # Handle both hash and Message objects From 8ec93e8819249b6c3c9e06650b4e00d843e51256 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 12 Sep 2025 11:44:13 -0700 Subject: [PATCH 33/38] use a spec helper --- spec/spec_helper.rb | 38 +------------------------------ spec/support/red_candle_loader.rb | 38 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 37 deletions(-) create mode 100644 spec/support/red_candle_loader.rb diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4e621c40d..b8796653c 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -18,40 +18,4 @@ require_relative 'support/models_to_test' require_relative 'support/streaming_error_helpers' require_relative 'support/provider_capabilities_helper' - -# Handle Red Candle provider based on availability and environment -begin - require 'candle' - - # Red Candle gem is installed - if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true' - # Use real inference - don't load the test helper - RSpec.configure do |config| - config.before(:suite) do - puts "\n🔥 Red Candle: Using REAL inference (this will be slow)" - puts " To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n" - end - end - else - # Use stubs (default when gem is installed) - require_relative 'support/red_candle_test_helper' - end -rescue LoadError - # Red Candle gem not installed - skip tests - RSpec.configure do |config| - config.before do |example| - # Skip Red Candle provider tests when gem not installed - test_description = example.full_description.to_s - if example.metadata[:file_path]&.include?('providers/red_candle') || - example.metadata[:described_class]&.to_s&.include?('RedCandle') || - test_description.include?('red_candle/') - skip 'Red Candle not installed (run: bundle config set --local with red-candle && bundle install)' - end - end - - config.before(:suite) do - puts "\n⚠️ Red Candle: Provider not available (gem not installed)" - puts " To enable: bundle config set --local with red-candle && bundle install\n\n" - end - end -end +require_relative 'support/red_candle_loader' diff --git a/spec/support/red_candle_loader.rb b/spec/support/red_candle_loader.rb new file mode 100644 index 000000000..b50ca863b --- /dev/null +++ b/spec/support/red_candle_loader.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +# Handle Red Candle provider based on availability and environment +begin + require 'candle' + + # Red Candle gem is installed + if ENV['RED_CANDLE_REAL_INFERENCE'] == 'true' + # Use real inference - don't load the test helper + RSpec.configure do |config| + config.before(:suite) do + puts "\n🔥 Red Candle: Using REAL inference (this will be slow)" + puts " To use mocked responses, unset RED_CANDLE_REAL_INFERENCE\n\n" + end + end + else + # Use stubs (default when gem is installed) + require_relative 'support/red_candle_test_helper' + end +rescue LoadError + # Red Candle gem not installed - skip tests + RSpec.configure do |config| + config.before do |example| + # Skip Red Candle provider tests when gem not installed + test_description = example.full_description.to_s + if example.metadata[:file_path]&.include?('providers/red_candle') || + example.metadata[:described_class]&.to_s&.include?('RedCandle') || + test_description.include?('red_candle/') + skip 'Red Candle not installed (run: bundle config set --local with red_candle && bundle install)' + end + end + + config.before(:suite) do + puts "\n⚠️ Red Candle: Provider not available (gem not installed)" + puts " To enable: bundle config set --local with red-candle && bundle install\n\n" + end + end +end From d1696ffd15082a8f0ff6a8c11b64ebd84919180d Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Fri, 12 Sep 2025 10:52:13 -0700 Subject: [PATCH 34/38] Remove the too cute pricing method --- lib/ruby_llm/providers/red_candle/capabilities.rb | 10 ---------- .../ruby_llm/providers/red_candle/capabilities_spec.rb | 10 ---------- 2 files changed, 20 deletions(-) diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index 40ad397f8..3311ee9fe 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -60,16 +60,6 @@ def model_context_window(model_id) end end - def pricing - # Local execution - no API costs - { - input_tokens_per_dollar: Float::INFINITY, - output_tokens_per_dollar: Float::INFINITY, - input_price_per_million_tokens: 0.0, - output_price_per_million_tokens: 0.0 - } - end - def default_max_tokens 512 end diff --git a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb index 2b9bf8875..03bb49f25 100644 --- a/spec/ruby_llm/providers/red_candle/capabilities_spec.rb +++ b/spec/ruby_llm/providers/red_candle/capabilities_spec.rb @@ -61,16 +61,6 @@ end end - describe '#pricing' do - it 'returns infinite tokens per dollar for local execution' do - pricing = described_class.pricing - expect(pricing[:input_tokens_per_dollar]).to eq(Float::INFINITY) - expect(pricing[:output_tokens_per_dollar]).to eq(Float::INFINITY) - expect(pricing[:input_price_per_million_tokens]).to eq(0.0) - expect(pricing[:output_price_per_million_tokens]).to eq(0.0) - end - end - describe 'generation parameters' do it 'provides correct defaults and limits' do expect(described_class.default_max_tokens).to eq(512) From 62a038986b100fb56fa671670f12bc71b7ec3bab Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Fri, 12 Sep 2025 12:42:23 -0700 Subject: [PATCH 35/38] Fix the comment for RubyLLM::Providers::RedCandle::Capabilities --- lib/ruby_llm/providers/red_candle/capabilities.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/red_candle/capabilities.rb b/lib/ruby_llm/providers/red_candle/capabilities.rb index 3311ee9fe..ec0afb6b7 100644 --- a/lib/ruby_llm/providers/red_candle/capabilities.rb +++ b/lib/ruby_llm/providers/red_candle/capabilities.rb @@ -3,7 +3,7 @@ module RubyLLM module Providers class RedCandle - # Determines capabilities and pricing for RedCandle models + # Determines capabilities for RedCandle models module Capabilities module_function From 90128bbfff164a089fcf78a564c0f6e82cc43336 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Fri, 12 Sep 2025 15:54:29 -0700 Subject: [PATCH 36/38] Make the require_relative actually relative --- spec/support/red_candle_loader.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/support/red_candle_loader.rb b/spec/support/red_candle_loader.rb index b50ca863b..b4fb00b4b 100644 --- a/spec/support/red_candle_loader.rb +++ b/spec/support/red_candle_loader.rb @@ -15,7 +15,7 @@ end else # Use stubs (default when gem is installed) - require_relative 'support/red_candle_test_helper' + require_relative 'red_candle_test_helper' end rescue LoadError # Red Candle gem not installed - skip tests From 9ab992dd25f3668f821ef161395d29f25a8961e6 Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Sat, 13 Sep 2025 09:59:35 -0700 Subject: [PATCH 37/38] Updatae to red-candle 1.3.0 to support ruby 3.1 --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 0cfcba3b5..c6d0742ae 100644 --- a/Gemfile +++ b/Gemfile @@ -45,5 +45,5 @@ end # Optional group for Red Candle provider (requires Rust toolchain) # To include: bundle config set --local with red-candle group :red_candle, optional: true do - gem 'red-candle', '~> 1.2' + gem 'red-candle', '~> 1.3' end From 922e0e93f20c067e0c0fc7ecd2bf124d1a46f13b Mon Sep 17 00:00:00 2001 From: Chris Petersen Date: Sat, 13 Sep 2025 10:06:53 -0700 Subject: [PATCH 38/38] Update the comment --- lib/ruby_llm/providers/red_candle/chat.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/red_candle/chat.rb b/lib/ruby_llm/providers/red_candle/chat.rb index 44cc4b695..915c2075b 100644 --- a/lib/ruby_llm/providers/red_candle/chat.rb +++ b/lib/ruby_llm/providers/red_candle/chat.rb @@ -25,7 +25,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc else result = perform_completion!(payload) # Convert to Message object for compatibility - # Red Candle doesn't provide token counts, but we can estimate them + # Red Candle doesn't provide token counts by default, but we can estimate them content = result[:content] # Rough estimation: ~4 characters per token estimated_output_tokens = (content.length / 4.0).round