IndexError: Target N is out of bounds within trainer.train() function

This may occur if num_labels is not passed during model loading.

from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import pandas as pd
import torch
import math

# 0) Example dataframe (replace with your df)
# df = pd.read_csv("your_data.csv")  # must contain 'text' and integer 'label'
df = pd.DataFrame({
    "text": [f"ejemplo {i}" for i in range(3000)],
    "label": np.repeat(np.arange(252), repeats=math.ceil(3000/252))[:3000]
})

# 1) Ensure labels are 0..C-1
C = int(df["label"].max() + 1)
m = int(df["label"].min())
if m != 0:
    df["label"] = df["label"] - m
assert df["label"].between(0, C - 1).all(), "labels must be in [0, C-1]"

# 2) Build small train/test datasets
ds = Dataset.from_pandas(df[["text", "label"]], split="train").train_test_split(test_size=0.1, seed=42)

# 3) Tokenize
tok = AutoTokenizer.from_pretrained("roberta-base")
def preprocess(ex):
    return tok(ex["text"], truncation=True, padding="max_length", max_length=64)
ds_tok = ds.map(preprocess, batched=True).remove_columns(["text"]).with_format("torch")

# 4) Create model with the correct class count; let Transformers swap the head
model = AutoModelForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=C, # tells the new classifier size
    ignore_mismatched_sizes=True,  # skip loading the old head
)
# optional but recommended: explicit label maps
model.config.id2label = {i: str(i) for i in range(C)}
model.config.label2id = {v: k for k, v in model.config.id2label.items()}

# 5) Train briefly
args = TrainingArguments(
    output_dir="out_fix",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    num_train_epochs=1,
    logging_steps=10,
    eval_strategy="no",
    report_to="none",
)

trainer = Trainer(model=model, args=args, train_dataset=ds_tok["train"])
trainer.train() # IndexError: Target ** is out of bounds. (If without num_labels and ignore_mismatched_sizes)