ai-benchmark/run.sh
second_constantine f60dbf49f1 feat: Add context size support for benchmarks and update example usage
This commit adds support for specifying context size when running benchmarks, which is passed to the Ollama client as the `num_ctx` option. The changes include:

- Updated the `run` method in the base benchmark class to accept an optional `context_size` parameter
- Modified the Ollama client call to include context size in the options when provided
- Updated the `run_benchmarks` function to accept and pass through the context size
- Added example usage to the help output showing how to use the new context size parameter
- Fixed prompt formatting in the summarization benchmark to use `text` instead of `task`

The changes enable running benchmarks with custom context sizes, which is useful for testing models with different context window limitations.
2026-01-26 15:27:37 +03:00

82 lines
3.2 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Получаем имя ядра (Linux Linux, macOS Darwin, FreeBSD FreeBSD …)
OS_NAME=$(uname -s)
init() {
if [[ "$OS_NAME" == "Darwin" ]]; then
python3.13 -m venv z
else
python3 -m venv z
fi
upd
}
upd() {
activate
pip install -r requirements.txt --upgrade
git submodule update --remote --merge
}
clean() {
rm -rf results/*
echo "Отчеты успешно очищены"
}
activate() {
source z/bin/activate
}
echo "_= Project Scripts =_"
if [ -n "$1" ]; then
if [[ "$1" == "init" ]]; then
init
elif [[ "$1" == "upd" ]]; then
upd
elif [[ "$1" == "run" ]]; then
activate
shift
python src/main.py "$@"
elif [[ "$1" == "clean" ]]; then
clean
elif [[ "$1" == "gen" ]]; then
activate
echo "🤖 Генерирую тесты через Ollama..."
python scripts/generate_tests.py --count 1 --category all --model second_constantine/t-lite-it-1.0:7b --ollama-url http://10.0.0.4:11434
echo "✅ Тесты успешно сгенерированы"
elif [[ "$1" == "gen-mongo" ]]; then
activate
echo "🔍 Генерирую тесты пересказов из MongoDB... "
if [[ -n "$2" ]] && [[ "$2" != "--id-file" ]]; then
# Старый формат: ./run.sh gen-mongo <record-id>
python scripts/generate_summarization_from_mongo.py --record-id "$2"
elif [[ -n "$2" ]] && [[ "$2" == "--id-file" ]]; then
# Новй формат: ./run.sh gen-mongo --id-file <file-path>
shift 2
python scripts/generate_summarization_from_mongo.py --id-file "$1"
else
echo "❌ Ошибка: Укажите либо --record-id, либо --id-file"
echo "Использование: ./run.sh gen-mongo <record-id> или ./run.sh gen-mongo --id-file <file-path>"
exit 1
fi
echo "✅ Тесты из MongoDB успешно сгенерированы"
fi
else
echo " Аргументом необходимо написать название скрипта (+опционально аргументы скрипта)"
echo "Скрипты:"
echo " * init - инициализация, устанавливает env"
echo " * upd - обновление зависимостей"
echo " * run - запуск бенчмарков (translation, summarization, codegen)"
echo " * clean - очистка отчетов"
echo " * gen - генерация тестов через Ollama (translation, summarization, codegen)"
echo " * gen-mongo - генерация тестов пересказов из MongoDB (использование: ./run.sh gen-mongo <record-id> или ./run.sh gen-mongo --id-file <file-path>)"
echo ""
echo "Примеры использования:"
echo " * ./run.sh run -m second_constantine/t-lite-it-1.0:7b -b translation summarization"
echo " * ./run.sh run -m second_constantine/t-lite-it-1.0:7b -u http://10.0.0.4:11434 -c 2048 -b translation summarization"
echo " * ./run.sh run -m translategemma:4b -u http://10.0.0.4:11434 -c 128000 -b summarization"
echo " * ./run.sh gen"
echo " * ./run.sh gen-mongo 507f1f77bcf86cd799439011"
echo " * ./run.sh gen-mongo --id-file ids.txt"
fi