doc: add test generation instructions and update run.sh

Added documentation for test generation through Ollama, including new command-line arguments for `generate_tests.py` and updated `run.sh` script. Also added a new `gen` command to `run.sh` for generating tests via Ollama. This improves usability by providing clear instructions and automation for test generation.
2026-01-17 02:40:38 +03:00
parent 5c17378ce4
commit f117c7b23c
11 changed files with 393 additions and 1 deletions
--- a/scripts/generate_tests.py
+++ b/scripts/generate_tests.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+"""
+Скрипт для генерации тестовых данных для бенчмарка AI с использованием Ollama.
+
+Генерирует тесты через LLM для категорий:
+- translation (переводы)
+- summarization (пересказы)
+- codegen (генерация кода)
+
+Поддерживает валидацию generated тестов.
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional
+
+# Добавляем путь к исходникам, чтобы импортировать ollama_client
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from src.models.ollama_client import OllamaClient
+
+def generate_translation_test(ollama: OllamaClient, model: str) -> Dict[str, str]:
+    """Генерирует тест для перевода через LLM."""
+    # Генерируем английский текст
+    en_prompt = 'Generate a simple English sentence for translation test. The sentence should be clear and not too long (5-10 words). Example: "Hello, how are you today?"'
+    en_text = ollama.generate(model, en_prompt).strip()
+
+    # Генерируем перевод
+    ru_prompt = f"""Translate the following English sentence to Russian:
+"{en_text}"
+Provide only the translation, no additional text."""
+    ru_text = ollama.generate(model, ru_prompt).strip()
+
+    return {
+        "prompt": f"Translate the following English text to Russian: '{en_text}'",
+        "expected": ru_text
+    }
+
+def generate_summarization_test(ollama: OllamaClient, model: str) -> Dict[str, str]:
+    """Генерирует тест для пересказа через LLM."""
+    # Генерируем текст для пересказа
+    text_prompt = 'Generate a short text (3-5 sentences) for summarization test. The text should be about technology, science, or programming. Example: "Artificial intelligence is intelligence demonstrated by machines. It involves studying intelligent agents that perceive their environment and take actions to achieve goals."'
+    text = ollama.generate(model, text_prompt).strip()
+
+    # Генерируем пересказ
+    summary_prompt = f"""Summarize the following text in 1-2 sentences:
+"{text}"
+Provide only the summary, no additional text."""
+    summary = ollama.generate(model, summary_prompt).strip()
+
+    return {
+        "prompt": f"Summarize the following text in 1-2 sentences: '{text}'",
+        "expected": summary
+    }
+
+def generate_codegen_test(ollama: OllamaClient, model: str) -> Dict[str, str]:
+    """Генерирует тест для генерации кода через LLM."""
+    # Генерируем задание для кода
+    task_prompt = 'Generate a simple Python programming task. The task should be clear and specific, asking to write a function. Example: "Write a Python function that calculates the factorial of a number using recursion."'
+    task = ollama.generate(model, task_prompt).strip()
+
+    # Генерируем код
+    code_prompt = f"""Write Python code to solve the following task:
+"{task}"
+Provide only the code, no explanations or additional text."""
+    code = ollama.generate(model, code_prompt).strip()
+
+    return {
+        "prompt": task,
+        "expected": code
+    }
+
+def generate_test(ollama: OllamaClient, model: str, category: str) -> Dict[str, str]:
+    """Генерирует тест для указанной категории через LLM."""
+    if category == "translation":
+        return generate_translation_test(ollama, model)
+    elif category == "summarization":
+        return generate_summarization_test(ollama, model)
+    elif category == "codegen":
+        return generate_codegen_test(ollama, model)
+    else:
+        raise ValueError(f"Unknown category: {category}")
+
+def validate_test(test_data: Dict[str, str]) -> bool:
+    """Валидирует тестовые данные."""
+    if not isinstance(test_data, dict):
+        print("❌ Тест должен быть словарём (JSON объект)")
+        return False
+
+    if "prompt" not in test_data:
+        print("❌ Отсутствует поле 'prompt'")
+        return False
+
+    if "expected" not in test_data:
+        print("❌ Отсутствует поле 'expected'")
+        return False
+
+    if not isinstance(test_data["prompt"], str):
+        print("❌ Поле 'prompt' должно быть строкой")
+        return False
+
+    if not isinstance(test_data["expected"], str):
+        print("❌ Поле 'expected' должно быть строкой")
+        return False
+
+    if not test_data["prompt"].strip():
+        print("❌ Поле 'prompt' не может быть пустым")
+        return False
+
+    if not test_data["expected"].strip():
+        print("❌ Поле 'expected' не может быть пустым")
+        return False
+
+    return True
+
+def validate_all_tests(test_dir: str) -> None:
+    """Валидирует все тесты в указанной директории."""
+    test_dir_path = Path(test_dir)
+    if not test_dir_path.exists():
+        print(f"❌ Директория {test_dir} не существует")
+        return
+
+    valid_count = 0
+    invalid_count = 0
+
+    for json_file in test_dir_path.glob("*.json"):
+        try:
+            with open(json_file, "r", encoding="utf-8") as f:
+                test_data = json.load(f)
+
+            if validate_test(test_data):
+                valid_count += 1
+                print(f"✅ {json_file.name} - валиден")
+            else:
+                invalid_count += 1
+                print(f"❌ {json_file.name} - не валиден")
+        except json.JSONDecodeError:
+            invalid_count += 1
+            print(f"❌ {json_file.name} - некорректный JSON")
+        except Exception as e:
+            invalid_count += 1
+            print(f"❌ {json_file.name} - ошибка: {str(e)}")
+
+    print(f"\nРезультаты валидации:")
+    print(f"Валидных тестов: {valid_count}")
+    print(f"Невалидных тестов: {invalid_count}")
+    print(f"Всего тестов: {valid_count + invalid_count}")
+
+def generate_tests(ollama: OllamaClient, model: str, count: int, category: str, output_dir: str) -> None:
+    """Генерирует указанное количество тестов через LLM."""
+    if category not in ["translation", "summarization", "codegen", "all"]:
+        print(f"❌ Неизвестная категория: {category}")
+        return
+
+    categories = [category] if category != "all" else ["translation", "summarization", "codegen"]
+
+    for cat in categories:
+        cat_dir = Path(output_dir) / cat
+        cat_dir.mkdir(parents=True, exist_ok=True)
+
+        for i in range(1, count + 1):
+            # Проверяем, существует ли уже тест с таким номером
+            test_num = 1
+            while True:
+                test_file = cat_dir / f"test{test_num}.json"
+                if not test_file.exists():
+                    break
+                test_num += 1
+
+            print(f"🤖 Генерирую тест {cat}/test{test_num}.json...")
+
+            # Генерируем тест через LLM
+            test_data = generate_test(ollama, model, cat)
+
+            # Валидируем generated тест
+            if not validate_test(test_data):
+                print(f"❌ Сгенерирован невалидный тест для {cat}, тест номер {test_num}")
+                continue
+
+            # Сохраняем тест
+            with open(test_file, "w", encoding="utf-8") as f:
+                json.dump(test_data, f, ensure_ascii=False, indent=2)
+
+            print(f"✅ Создан тест {cat}/test{test_num}.json")
+
+def main():
+    """Основная функция скрипта."""
+    parser = argparse.ArgumentParser(
+        description="Генератор тестовых данных для AI бенчмарка с использованием Ollama",
+        epilog="Примеры использования:\n"
+               "  python scripts/generate_tests.py --count 2 --category translation --model second_constantine/t-lite-it-1.0:7b --ollama-url http://10.0.0.4:11434\n"
+               "  python scripts/generate_tests.py --category all --model second_constantine/t-lite-it-1.0:7b --ollama-url http://10.0.0.4:11434\n"
+               "  python scripts/generate_tests.py --validate tests/translation"
+    )
+    parser.add_argument(
+        "--count",
+        type=int,
+        default=1,
+        help="Количество тестов для генерации (по умолчанию: 1)"
+    )
+    parser.add_argument(
+        "--category",
+        type=str,
+        default="all",
+        choices=["translation", "summarization", "codegen", "all"],
+        help="Категория тестов (translation, summarization, codegen, или all) (по умолчанию: all)"
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Название модели для генерации тестов (обязательный параметр)"
+    )
+    parser.add_argument(
+        "--ollama-url",
+        type=str,
+        required=True,
+        help="URL подключения к Ollama серверу (обязательный параметр)"
+    )
+    parser.add_argument(
+        "--validate",
+        type=str,
+        help="Валидировать тесты в указанной директории (например: tests/translation)"
+    )
+
+    args = parser.parse_args()
+
+    if args.validate:
+        print(f"🔍 Начинаю валидацию тестов в {args.validate}")
+        validate_all_tests(args.validate)
+    else:
+        print(f"🤖 Подключаюсь к Ollama серверу: {args.ollama_url}")
+        print(f"📝 Генерирую {args.count} тест(ов) для категории: {args.category}")
+        print(f"🎯 Используемая модель: {args.model}")
+
+        try:
+            ollama = OllamaClient(args.ollama_url)
+            generate_tests(ollama, args.model, args.count, args.category, "tests")
+        except Exception as e:
+            print(f"❌ Ошибка при генерации тестов: {e}")
+            sys.exit(1)
+
+    print("\n✨ Готово!")
+
+if __name__ == "__main__":
+    main()