docs: update test format documentation in README

Update documentation to reflect new TXT format with separator for summarization tests instead of JSON format. Clarify that expected field may be empty if summary generation fails.

feat: change test generation to TXT format with separator

Change test generation from JSON to TXT format with TEST_SEPARATOR. Add filename sanitization function to handle MongoDB record IDs. Update output path and file naming logic. Add attempt to generate expected summary through LLM with fallback to empty string.
This commit is contained in:
2026-01-22 20:40:41 +03:00
parent 2466f1253a
commit 2a04e6c089
21 changed files with 96 additions and 104 deletions

View File

@@ -9,6 +9,9 @@ class CodegenBenchmark(Benchmark):
def __init__(self):
super().__init__("codegen")
# Загружаем универсальный промпт
with open('prompts/codegen.txt', 'r', encoding='utf-8') as f:
self.universal_prompt = f.read().strip()
def load_test_data(self) -> List[Dict[str, Any]]:
"""
@@ -29,7 +32,7 @@ class CodegenBenchmark(Benchmark):
if len(parts) == 2:
test_data.append({
'name': filename.replace('.txt', ''),
'prompt': parts[0],
'prompt': self.universal_prompt.format(task=parts[0]),
'expected': parts[1]
})

View File

@@ -9,6 +9,9 @@ class SummarizationBenchmark(Benchmark):
def __init__(self):
super().__init__("summarization")
# Загружаем универсальный промпт
with open('prompts/summarization.txt', 'r', encoding='utf-8') as f:
self.universal_prompt = f.read().strip()
def load_test_data(self) -> List[Dict[str, Any]]:
"""
@@ -29,7 +32,7 @@ class SummarizationBenchmark(Benchmark):
if len(parts) == 2:
test_data.append({
'name': filename.replace('.txt', ''),
'prompt': parts[0],
'prompt': self.universal_prompt.format(task=parts[0]),
'expected': parts[1]
})

View File

@@ -5,14 +5,17 @@ from typing import Dict, Any, List
from benchmarks.base import Benchmark, TEST_SEPARATOR
class TranslationBenchmark(Benchmark):
"""Бенчмарк для тестирования переводов."""
"""Бенчмарк для тестирования перевода."""
def __init__(self):
super().__init__("translation")
# Загружаем универсальный промпт
with open('prompts/translation.txt', 'r', encoding='utf-8') as f:
self.universal_prompt = f.read().strip()
def load_test_data(self) -> List[Dict[str, Any]]:
"""
Загрузка тестовых данных для переводов.
Загрузка тестовых данных для перевода.
Returns:
Список тестовых случаев
@@ -29,7 +32,7 @@ class TranslationBenchmark(Benchmark):
if len(parts) == 2:
test_data.append({
'name': filename.replace('.txt', ''),
'prompt': parts[0],
'prompt': self.universal_prompt.format(text=parts[0]),
'expected': parts[1]
})