docs: update test format documentation in README
Update documentation to reflect new TXT format with separator for summarization tests instead of JSON format. Clarify that expected field may be empty if summary generation fails. feat: change test generation to TXT format with separator Change test generation from JSON to TXT format with TEST_SEPARATOR. Add filename sanitization function to handle MongoDB record IDs. Update output path and file naming logic. Add attempt to generate expected summary through LLM with fallback to empty string.
This commit is contained in:
@@ -9,6 +9,9 @@ class CodegenBenchmark(Benchmark):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("codegen")
|
||||
# Загружаем универсальный промпт
|
||||
with open('prompts/codegen.txt', 'r', encoding='utf-8') as f:
|
||||
self.universal_prompt = f.read().strip()
|
||||
|
||||
def load_test_data(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
@@ -29,7 +32,7 @@ class CodegenBenchmark(Benchmark):
|
||||
if len(parts) == 2:
|
||||
test_data.append({
|
||||
'name': filename.replace('.txt', ''),
|
||||
'prompt': parts[0],
|
||||
'prompt': self.universal_prompt.format(task=parts[0]),
|
||||
'expected': parts[1]
|
||||
})
|
||||
|
||||
|
||||
@@ -9,6 +9,9 @@ class SummarizationBenchmark(Benchmark):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("summarization")
|
||||
# Загружаем универсальный промпт
|
||||
with open('prompts/summarization.txt', 'r', encoding='utf-8') as f:
|
||||
self.universal_prompt = f.read().strip()
|
||||
|
||||
def load_test_data(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
@@ -29,7 +32,7 @@ class SummarizationBenchmark(Benchmark):
|
||||
if len(parts) == 2:
|
||||
test_data.append({
|
||||
'name': filename.replace('.txt', ''),
|
||||
'prompt': parts[0],
|
||||
'prompt': self.universal_prompt.format(task=parts[0]),
|
||||
'expected': parts[1]
|
||||
})
|
||||
|
||||
|
||||
@@ -5,14 +5,17 @@ from typing import Dict, Any, List
|
||||
from benchmarks.base import Benchmark, TEST_SEPARATOR
|
||||
|
||||
class TranslationBenchmark(Benchmark):
|
||||
"""Бенчмарк для тестирования переводов."""
|
||||
"""Бенчмарк для тестирования перевода."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("translation")
|
||||
# Загружаем универсальный промпт
|
||||
with open('prompts/translation.txt', 'r', encoding='utf-8') as f:
|
||||
self.universal_prompt = f.read().strip()
|
||||
|
||||
def load_test_data(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Загрузка тестовых данных для переводов.
|
||||
Загрузка тестовых данных для перевода.
|
||||
|
||||
Returns:
|
||||
Список тестовых случаев
|
||||
@@ -29,7 +32,7 @@ class TranslationBenchmark(Benchmark):
|
||||
if len(parts) == 2:
|
||||
test_data.append({
|
||||
'name': filename.replace('.txt', ''),
|
||||
'prompt': parts[0],
|
||||
'prompt': self.universal_prompt.format(text=parts[0]),
|
||||
'expected': parts[1]
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user