# ---------------------------------------------------------------
# 📚 LEARNING RESOURCES
# Quick Start: https://github.com/Kaggle/kaggle-benchmarks/blob/ci/quick_start.md
# Cookbook: https://github.com/Kaggle/kaggle-benchmarks/blob/ci/cookbook.md
# ---------------------------------------------------------------
import kaggle_benchmarks as kbench
# ---------------------------------------------------------------
# STEP 1: DEFINE YOUR TASK
# The @task decorator turns a standard Python function into a Benchmark task.
# The first parameter must always be 'llm' (the model being tested).
# ---------------------------------------------------------------
@kbench.task(name="What is Kaggle?", description="Does the LLM know what Kaggle is?")
def what_is_kaggle(llm) -> None:
# A. Prompt the model
response: str = llm.prompt("What is Kaggle?")
# B. Simple Check (Hard Rule)
kbench.assertions.assert_in("platform", response.lower())
# C. Optional Advanced Check (LLM Judge)
assessment = kbench.assertions.assess_response_with_judge(
response_text=response,
judge_llm=kbench.judge_llm,
criteria=[
"The answer must mention data science or machine learning.",
"The answer should mention competitions."
]
)
# Iterate through the judge's feedback and assert success
for result in assessment.results:
kbench.assertions.assert_true(
result.passed,
expectation=f"Judge Criterion '{result.criterion}' should pass: {result.reason}"
)
# ---------------------------------------------------------------
# STEP 2: RUN THE TASK
# ---------------------------------------------------------------
what_is_kaggle.run(kbench.llm)
8,240 / 15,000 characters
NEW
Your notebook will run fine, but it may not register as a benchmark on the leaderboard if it exceeds 15,000 characters.
Tip: Move long prompts to an external JSON file to keep your task code compact.
Tip: Move long prompts to an external JSON file to keep your task code compact.
Assertion string too long — Line 34: expectation is 67 characters (max 35). Task may run successfully but won't register on the leaderboard. NEW