class Riffer::Evals::EvaluatorRunner
Orchestrates running evaluators against an agent across multiple scenarios.
Accepts an agent class, a list of scenarios, and evaluator classes. Generates agent output for each scenario and runs all evaluators, returning a RunResult with per-scenario details and aggregate scores.
result = Riffer::Evals::EvaluatorRunner.run( agent: MyAgent, scenarios: [ { input: "What is Ruby?", ground_truth: "A programming language" }, { input: "What is Python?" } ], evaluators: [AnswerRelevancyEvaluator] ) result.scores # => { AnswerRelevancyEvaluator => 0.85 }
Public Class Methods
Source
# File lib/riffer/evals/evaluator_runner.rb, line 34 def self.run(agent:, scenarios:, evaluators:, context: nil) validate_agent!(agent) validate_evaluators!(evaluators) scenario_results = scenarios.map do |scenario| run_scenario(agent: agent, scenario: scenario, evaluators: evaluators, context: context) end Riffer::Evals::RunResult.new(scenario_results: scenario_results) end
Runs evaluators against an agent for the given scenarios.
- agent
-
an Agent subclass (not an instance).
- scenarios
-
array of hashes with
:input, optional:ground_truth, and optional:context. - evaluators
-
array of Evaluator subclasses to run against each scenario.
- context
-
optional hash passed to
agent.generate. Per-scenario:contexttakes precedence.
Raises Riffer::ArgumentError if agent is not a Riffer::Agent subclass or any eval is not a Riffer::Evals::Evaluator subclass.
Source
# File lib/riffer/evals/evaluator_runner.rb, line 65 def self.run_scenario(agent:, scenario:, evaluators:, context: nil) input = scenario[:input] ground_truth = scenario[:ground_truth] resolved_context = scenario[:context] || context response = agent.generate(input, context: resolved_context) output = response.content messages = response.messages results = evaluators.map do |evaluator_class| evaluator_class.new.evaluate(input: input, output: output, ground_truth: ground_truth, messages: messages) end Riffer::Evals::ScenarioResult.new( input: input, output: output, ground_truth: ground_truth, results: results, messages: messages ) end
Source
# File lib/riffer/evals/evaluator_runner.rb, line 47 def self.validate_agent!(agent) return if agent.is_a?(Class) && agent < Riffer::Agent raise Riffer::ArgumentError, "agent must be a subclass of Riffer::Agent, got #{agent.inspect}" end
Source
# File lib/riffer/evals/evaluator_runner.rb, line 55 def self.validate_evaluators!(evaluators) evaluators.each do |evaluator_class| next if evaluator_class.is_a?(Class) && evaluator_class < Riffer::Evals::Evaluator raise Riffer::ArgumentError, "each evaluator must be a subclass of Riffer::Evals::Evaluator, got #{evaluator_class.inspect}" end end