Skip to content

Commit ae7a8b8

Browse files
feat(api): further updates for evals API
1 parent 690b6a7 commit ae7a8b8

38 files changed

+307
-265
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 99
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5fa16b9a02985ae06e41be14946a9c325dc672fb014b3c19abca65880c6990e6.yml
3-
openapi_spec_hash: da3e669f65130043b1170048c0727890
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-262e171d0a8150ea1192474d16ba3afdf9a054b399f1a49a9c9b697a3073c136.yml
3+
openapi_spec_hash: 33e00a48df8f94c94f46290c489f132b
44
config_hash: d8d5fda350f6db77c784f35429741a2e

lib/openai/models/eval_create_params.rb

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,17 @@ class EvalCreateParams < OpenAI::Internal::Type::BaseModel
88
include OpenAI::Internal::Type::RequestParameters
99

1010
# @!attribute data_source_config
11-
# The configuration for the data source used for the evaluation runs.
11+
# The configuration for the data source used for the evaluation runs. Dictates the
12+
# schema of the data used in the evaluation.
1213
#
1314
# @return [OpenAI::EvalCreateParams::DataSourceConfig::Custom, OpenAI::EvalCreateParams::DataSourceConfig::Logs, OpenAI::EvalCreateParams::DataSourceConfig::StoredCompletions]
1415
required :data_source_config, union: -> { OpenAI::EvalCreateParams::DataSourceConfig }
1516

1617
# @!attribute testing_criteria
17-
# A list of graders for all eval runs in this group.
18+
# A list of graders for all eval runs in this group. Graders can reference
19+
# variables in the data source using double curly braces notation, like
20+
# `{{item.variable_name}}`. To reference the model's output, use the `sample`
21+
# namespace (ie, `{{sample.output_text}}`).
1822
#
1923
# @return [Array<OpenAI::EvalCreateParams::TestingCriterion::LabelModel, OpenAI::Graders::StringCheckGrader, OpenAI::EvalCreateParams::TestingCriterion::TextSimilarity, OpenAI::EvalCreateParams::TestingCriterion::Python, OpenAI::EvalCreateParams::TestingCriterion::ScoreModel>]
2024
required :testing_criteria,
@@ -41,17 +45,18 @@ class EvalCreateParams < OpenAI::Internal::Type::BaseModel
4145
# Some parameter documentations has been truncated, see
4246
# {OpenAI::Models::EvalCreateParams} for more details.
4347
#
44-
# @param data_source_config [OpenAI::EvalCreateParams::DataSourceConfig::Custom, OpenAI::EvalCreateParams::DataSourceConfig::Logs, OpenAI::EvalCreateParams::DataSourceConfig::StoredCompletions] The configuration for the data source used for the evaluation runs.
48+
# @param data_source_config [OpenAI::EvalCreateParams::DataSourceConfig::Custom, OpenAI::EvalCreateParams::DataSourceConfig::Logs, OpenAI::EvalCreateParams::DataSourceConfig::StoredCompletions] The configuration for the data source used for the evaluation runs. Dictates the
4549
#
46-
# @param testing_criteria [Array<OpenAI::EvalCreateParams::TestingCriterion::LabelModel, OpenAI::Graders::StringCheckGrader, OpenAI::EvalCreateParams::TestingCriterion::TextSimilarity, OpenAI::EvalCreateParams::TestingCriterion::Python, OpenAI::EvalCreateParams::TestingCriterion::ScoreModel>] A list of graders for all eval runs in this group.
50+
# @param testing_criteria [Array<OpenAI::EvalCreateParams::TestingCriterion::LabelModel, OpenAI::Graders::StringCheckGrader, OpenAI::EvalCreateParams::TestingCriterion::TextSimilarity, OpenAI::EvalCreateParams::TestingCriterion::Python, OpenAI::EvalCreateParams::TestingCriterion::ScoreModel>] A list of graders for all eval runs in this group. Graders can reference variabl
4751
#
4852
# @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be
4953
#
5054
# @param name [String] The name of the evaluation.
5155
#
5256
# @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
5357

54-
# The configuration for the data source used for the evaluation runs.
58+
# The configuration for the data source used for the evaluation runs. Dictates the
59+
# schema of the data used in the evaluation.
5560
module DataSourceConfig
5661
extend OpenAI::Internal::Type::Union
5762

@@ -68,7 +73,7 @@ module DataSourceConfig
6873
variant :logs, -> { OpenAI::EvalCreateParams::DataSourceConfig::Logs }
6974

7075
# Deprecated in favor of LogsDataSourceConfig.
71-
variant :"stored-completions", -> { OpenAI::EvalCreateParams::DataSourceConfig::StoredCompletions }
76+
variant :stored_completions, -> { OpenAI::EvalCreateParams::DataSourceConfig::StoredCompletions }
7277

7378
class Custom < OpenAI::Internal::Type::BaseModel
7479
# @!attribute item_schema
@@ -130,25 +135,26 @@ class Logs < OpenAI::Internal::Type::BaseModel
130135
# @param type [Symbol, :logs] The type of data source. Always `logs`.
131136
end
132137

138+
# @deprecated
133139
class StoredCompletions < OpenAI::Internal::Type::BaseModel
134140
# @!attribute type
135-
# The type of data source. Always `stored-completions`.
141+
# The type of data source. Always `stored_completions`.
136142
#
137-
# @return [Symbol, :"stored-completions"]
138-
required :type, const: :"stored-completions"
143+
# @return [Symbol, :stored_completions]
144+
required :type, const: :stored_completions
139145

140146
# @!attribute metadata
141147
# Metadata filters for the stored completions data source.
142148
#
143149
# @return [Hash{Symbol=>Object}, nil]
144150
optional :metadata, OpenAI::Internal::Type::HashOf[OpenAI::Internal::Type::Unknown]
145151

146-
# @!method initialize(metadata: nil, type: :"stored-completions")
152+
# @!method initialize(metadata: nil, type: :stored_completions)
147153
# Deprecated in favor of LogsDataSourceConfig.
148154
#
149155
# @param metadata [Hash{Symbol=>Object}] Metadata filters for the stored completions data source.
150156
#
151-
# @param type [Symbol, :"stored-completions"] The type of data source. Always `stored-completions`.
157+
# @param type [Symbol, :stored_completions] The type of data source. Always `stored_completions`.
152158
end
153159

154160
# @!method self.variants
@@ -191,7 +197,7 @@ module TestingCriterion
191197
class LabelModel < OpenAI::Internal::Type::BaseModel
192198
# @!attribute input
193199
# A list of chat messages forming the prompt or context. May include variable
194-
# references to the "item" namespace, ie {{item.name}}.
200+
# references to the `item` namespace, ie {{item.name}}.
195201
#
196202
# @return [Array<OpenAI::EvalCreateParams::TestingCriterion::LabelModel::Input::SimpleInputMessage, OpenAI::EvalCreateParams::TestingCriterion::LabelModel::Input::EvalItem>]
197203
required :input,
@@ -249,7 +255,7 @@ class LabelModel < OpenAI::Internal::Type::BaseModel
249255
# @param type [Symbol, :label_model] The object type, which is always `label_model`.
250256

251257
# A chat message that makes up the prompt or context. May include variable
252-
# references to the "item" namespace, ie {{item.name}}.
258+
# references to the `item` namespace, ie {{item.name}}.
253259
module Input
254260
extend OpenAI::Internal::Type::Union
255261

lib/openai/models/eval_create_response.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class EvalCreateResponse < OpenAI::Internal::Type::BaseModel
6161
#
6262
# - Improve the quality of my chatbot
6363
# - See how well my chatbot handles customer support
64-
# - Check if o3-mini is better at my usecase than gpt-4o
64+
# - Check if o4-mini is better at my usecase than gpt-4o
6565
#
6666
# @param id [String] Unique identifier for the evaluation.
6767
#
@@ -98,7 +98,7 @@ module DataSourceConfig
9898
variant :logs, -> { OpenAI::Models::EvalCreateResponse::DataSourceConfig::Logs }
9999

100100
# Deprecated in favor of LogsDataSourceConfig.
101-
variant :"stored-completions", -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
101+
variant :stored_completions, -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
102102

103103
class Logs < OpenAI::Internal::Type::BaseModel
104104
# @!attribute schema

lib/openai/models/eval_list_response.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class EvalListResponse < OpenAI::Internal::Type::BaseModel
6161
#
6262
# - Improve the quality of my chatbot
6363
# - See how well my chatbot handles customer support
64-
# - Check if o3-mini is better at my usecase than gpt-4o
64+
# - Check if o4-mini is better at my usecase than gpt-4o
6565
#
6666
# @param id [String] Unique identifier for the evaluation.
6767
#
@@ -98,7 +98,7 @@ module DataSourceConfig
9898
variant :logs, -> { OpenAI::Models::EvalListResponse::DataSourceConfig::Logs }
9999

100100
# Deprecated in favor of LogsDataSourceConfig.
101-
variant :"stored-completions", -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
101+
variant :stored_completions, -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
102102

103103
class Logs < OpenAI::Internal::Type::BaseModel
104104
# @!attribute schema

lib/openai/models/eval_retrieve_response.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class EvalRetrieveResponse < OpenAI::Internal::Type::BaseModel
6161
#
6262
# - Improve the quality of my chatbot
6363
# - See how well my chatbot handles customer support
64-
# - Check if o3-mini is better at my usecase than gpt-4o
64+
# - Check if o4-mini is better at my usecase than gpt-4o
6565
#
6666
# @param id [String] Unique identifier for the evaluation.
6767
#
@@ -98,7 +98,7 @@ module DataSourceConfig
9898
variant :logs, -> { OpenAI::Models::EvalRetrieveResponse::DataSourceConfig::Logs }
9999

100100
# Deprecated in favor of LogsDataSourceConfig.
101-
variant :"stored-completions", -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
101+
variant :stored_completions, -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
102102

103103
class Logs < OpenAI::Internal::Type::BaseModel
104104
# @!attribute schema

lib/openai/models/eval_stored_completions_data_source_config.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ class EvalStoredCompletionsDataSourceConfig < OpenAI::Internal::Type::BaseModel
1212
required :schema, OpenAI::Internal::Type::HashOf[OpenAI::Internal::Type::Unknown]
1313

1414
# @!attribute type
15-
# The type of data source. Always `stored-completions`.
15+
# The type of data source. Always `stored_completions`.
1616
#
17-
# @return [Symbol, :"stored-completions"]
18-
required :type, const: :"stored-completions"
17+
# @return [Symbol, :stored_completions]
18+
required :type, const: :stored_completions
1919

2020
# @!attribute metadata
2121
# Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -28,7 +28,7 @@ class EvalStoredCompletionsDataSourceConfig < OpenAI::Internal::Type::BaseModel
2828
# @return [Hash{Symbol=>String}, nil]
2929
optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true
3030

31-
# @!method initialize(schema:, metadata: nil, type: :"stored-completions")
31+
# @!method initialize(schema:, metadata: nil, type: :stored_completions)
3232
# Some parameter documentations has been truncated, see
3333
# {OpenAI::EvalStoredCompletionsDataSourceConfig} for more details.
3434
#
@@ -38,7 +38,7 @@ class EvalStoredCompletionsDataSourceConfig < OpenAI::Internal::Type::BaseModel
3838
#
3939
# @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be
4040
#
41-
# @param type [Symbol, :"stored-completions"] The type of data source. Always `stored-completions`.
41+
# @param type [Symbol, :stored_completions] The type of data source. Always `stored_completions`.
4242
end
4343
end
4444
end

lib/openai/models/eval_update_response.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class EvalUpdateResponse < OpenAI::Internal::Type::BaseModel
6161
#
6262
# - Improve the quality of my chatbot
6363
# - See how well my chatbot handles customer support
64-
# - Check if o3-mini is better at my usecase than gpt-4o
64+
# - Check if o4-mini is better at my usecase than gpt-4o
6565
#
6666
# @param id [String] Unique identifier for the evaluation.
6767
#
@@ -98,7 +98,7 @@ module DataSourceConfig
9898
variant :logs, -> { OpenAI::Models::EvalUpdateResponse::DataSourceConfig::Logs }
9999

100100
# Deprecated in favor of LogsDataSourceConfig.
101-
variant :"stored-completions", -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
101+
variant :stored_completions, -> { OpenAI::EvalStoredCompletionsDataSourceConfig }
102102

103103
class Logs < OpenAI::Internal::Type::BaseModel
104104
# @!attribute schema

lib/openai/models/evals/create_eval_completions_run_data_source.rb

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module Models
55
module Evals
66
class CreateEvalCompletionsRunDataSource < OpenAI::Internal::Type::BaseModel
77
# @!attribute source
8-
# A StoredCompletionsRunDataSource configuration describing a set of filters
8+
# Determines what populates the `item` namespace in this run's data source.
99
#
1010
# @return [OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileID, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::StoredCompletions]
1111
required :source, union: -> { OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source }
@@ -17,6 +17,10 @@ class CreateEvalCompletionsRunDataSource < OpenAI::Internal::Type::BaseModel
1717
required :type, enum: -> { OpenAI::Evals::CreateEvalCompletionsRunDataSource::Type }
1818

1919
# @!attribute input_messages
20+
# Used when sampling from a model. Dictates the structure of the messages passed
21+
# into the model. Can either be a reference to a prebuilt trajectory (ie,
22+
# `item.input_trajectory`), or a template with variable references to the `item`
23+
# namespace.
2024
#
2125
# @return [OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::Template, OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::ItemReference, nil]
2226
optional :input_messages,
@@ -41,17 +45,17 @@ class CreateEvalCompletionsRunDataSource < OpenAI::Internal::Type::BaseModel
4145
#
4246
# A CompletionsRunDataSource object describing a model sampling configuration.
4347
#
44-
# @param source [OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileID, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::StoredCompletions] A StoredCompletionsRunDataSource configuration describing a set of filters
48+
# @param source [OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::FileID, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Source::StoredCompletions] Determines what populates the `item` namespace in this run's data source.
4549
#
4650
# @param type [Symbol, OpenAI::Evals::CreateEvalCompletionsRunDataSource::Type] The type of run data source. Always `completions`.
4751
#
48-
# @param input_messages [OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::Template, OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::ItemReference]
52+
# @param input_messages [OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::Template, OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::ItemReference] Used when sampling from a model. Dictates the structure of the messages passed i
4953
#
5054
# @param model [String] The name of the model to use for generating completions (e.g. "o3-mini").
5155
#
5256
# @param sampling_params [OpenAI::Evals::CreateEvalCompletionsRunDataSource::SamplingParams]
5357

54-
# A StoredCompletionsRunDataSource configuration describing a set of filters
58+
# Determines what populates the `item` namespace in this run's data source.
5559
#
5660
# @see OpenAI::Evals::CreateEvalCompletionsRunDataSource#source
5761
module Source
@@ -212,6 +216,11 @@ module Type
212216
# @return [Array<Symbol>]
213217
end
214218

219+
# Used when sampling from a model. Dictates the structure of the messages passed
220+
# into the model. Can either be a reference to a prebuilt trajectory (ie,
221+
# `item.input_trajectory`), or a template with variable references to the `item`
222+
# namespace.
223+
#
215224
# @see OpenAI::Evals::CreateEvalCompletionsRunDataSource#input_messages
216225
module InputMessages
217226
extend OpenAI::Internal::Type::Union
@@ -226,7 +235,7 @@ module InputMessages
226235
class Template < OpenAI::Internal::Type::BaseModel
227236
# @!attribute template
228237
# A list of chat messages forming the prompt or context. May include variable
229-
# references to the "item" namespace, ie {{item.name}}.
238+
# references to the `item` namespace, ie {{item.name}}.
230239
#
231240
# @return [Array<OpenAI::Responses::EasyInputMessage, OpenAI::Evals::CreateEvalCompletionsRunDataSource::InputMessages::Template::Template::Message>]
232241
required :template,
@@ -423,7 +432,7 @@ module Type
423432

424433
class ItemReference < OpenAI::Internal::Type::BaseModel
425434
# @!attribute item_reference
426-
# A reference to a variable in the "item" namespace. Ie, "item.name"
435+
# A reference to a variable in the `item` namespace. Ie, "item.input_trajectory"
427436
#
428437
# @return [String]
429438
required :item_reference, String
@@ -435,7 +444,7 @@ class ItemReference < OpenAI::Internal::Type::BaseModel
435444
required :type, const: :item_reference
436445

437446
# @!method initialize(item_reference:, type: :item_reference)
438-
# @param item_reference [String] A reference to a variable in the "item" namespace. Ie, "item.name"
447+
# @param item_reference [String] A reference to a variable in the `item` namespace. Ie, "item.input_trajectory"
439448
#
440449
# @param type [Symbol, :item_reference] The type of input messages. Always `item_reference`.
441450
end

lib/openai/models/evals/create_eval_jsonl_run_data_source.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ module Models
55
module Evals
66
class CreateEvalJSONLRunDataSource < OpenAI::Internal::Type::BaseModel
77
# @!attribute source
8+
# Determines what populates the `item` namespace in the data source.
89
#
910
# @return [OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileID]
1011
required :source, union: -> { OpenAI::Evals::CreateEvalJSONLRunDataSource::Source }
@@ -19,10 +20,12 @@ class CreateEvalJSONLRunDataSource < OpenAI::Internal::Type::BaseModel
1920
# A JsonlRunDataSource object with that specifies a JSONL file that matches the
2021
# eval
2122
#
22-
# @param source [OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileID]
23+
# @param source [OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileContent, OpenAI::Evals::CreateEvalJSONLRunDataSource::Source::FileID] Determines what populates the `item` namespace in the data source.
2324
#
2425
# @param type [Symbol, :jsonl] The type of data source. Always `jsonl`.
2526

27+
# Determines what populates the `item` namespace in the data source.
28+
#
2629
# @see OpenAI::Evals::CreateEvalJSONLRunDataSource#source
2730
module Source
2831
extend OpenAI::Internal::Type::Union

0 commit comments

Comments
 (0)