diff --git a/run.sh b/run.sh index 3b818bb..b8b424f 100755 --- a/run.sh +++ b/run.sh @@ -73,7 +73,8 @@ else echo "" echo "Примеры использования:" echo " * ./run.sh run -m second_constantine/t-lite-it-1.0:7b -b translation summarization" - echo " * ./run.sh run -m second_constantine/t-lite-it-1.0:7b --num-ctx 16000" + echo " * ./run.sh run -m second_constantine/t-lite-it-1.0:7b -u http://10.0.0.4:11434 -c 2048 -b translation summarization" + echo " * ./run.sh run -m translategemma:4b -u http://10.0.0.4:11434 -c 128000 -b summarization" echo " * ./run.sh gen" echo " * ./run.sh gen-mongo 507f1f77bcf86cd799439011" echo " * ./run.sh gen-mongo --id-file ids.txt" diff --git a/src/benchmarks/__pycache__/base.cpython-313.pyc b/src/benchmarks/__pycache__/base.cpython-313.pyc index 9a972bc..7e144bf 100644 Binary files a/src/benchmarks/__pycache__/base.cpython-313.pyc and b/src/benchmarks/__pycache__/base.cpython-313.pyc differ diff --git a/src/benchmarks/__pycache__/codegen.cpython-313.pyc b/src/benchmarks/__pycache__/codegen.cpython-313.pyc index 23cc537..e6dad8e 100644 Binary files a/src/benchmarks/__pycache__/codegen.cpython-313.pyc and b/src/benchmarks/__pycache__/codegen.cpython-313.pyc differ diff --git a/src/benchmarks/__pycache__/summarization.cpython-313.pyc b/src/benchmarks/__pycache__/summarization.cpython-313.pyc index 73365b5..5c0fc70 100644 Binary files a/src/benchmarks/__pycache__/summarization.cpython-313.pyc and b/src/benchmarks/__pycache__/summarization.cpython-313.pyc differ diff --git a/src/benchmarks/__pycache__/translation.cpython-313.pyc b/src/benchmarks/__pycache__/translation.cpython-313.pyc index 0aeaf49..f34c1bc 100644 Binary files a/src/benchmarks/__pycache__/translation.cpython-313.pyc and b/src/benchmarks/__pycache__/translation.cpython-313.pyc differ diff --git a/src/benchmarks/base.py b/src/benchmarks/base.py index 0260960..4a4e9cf 100644 --- a/src/benchmarks/base.py +++ b/src/benchmarks/base.py @@ -46,14 +46,13 @@ class Benchmark(ABC): """ pass - def run(self, ollama_client: OllamaClient, model_name: str, num_ctx: int = 32000) -> Dict[str, Any]: + def run(self, ollama_client: OllamaClient, model_name: str, num_ctx: int = 32000, context_size: int = None) -> Dict[str, Any]: """ Запуск бенчмарка. Args: ollama_client: Клиент для работы с Ollama model_name: Название модели - num_ctx: Размер контекста Returns: Результаты бенчмарка @@ -73,12 +72,20 @@ class Benchmark(ABC): # Получение ответа от модели prompt = test_case['prompt'] self.logger.debug(f"Prompt: {prompt[:200]}...") # Логируем начало промпта + # Подготовка опций для вызова + options = {'temperature': 0.7} + if context_size is not None: + # Для Ollama параметры контекста передаются в options + options['num_ctx'] = context_size + self.logger.debug(f"Setting context size to {context_size}") + + self.logger.debug(f"About to call generate with model={model_name}, prompt length={len(prompt)}, options={options}") model_response = ollama_client.generate( model=model_name, prompt=prompt, - num_ctx=num_ctx, - options={'temperature': 0.7} + options=options ) + self.logger.debug(f"Generate call completed, response length={len(model_response) if model_response else 0}") # Замер времени latency = time.time() - start_time diff --git a/src/main.py b/src/main.py index b477a9b..11b15bc 100644 --- a/src/main.py +++ b/src/main.py @@ -18,7 +18,7 @@ def setup_logging(verbose: bool = False): ] ) -def run_benchmarks(ollama_client: OllamaClient, model_name: str, benchmarks: List[str], num_ctx: int) -> List[dict]: +def run_benchmarks(ollama_client: OllamaClient, model_name: str, benchmarks: List[str], context_size: int = None) -> List[dict]: """ Запуск выбранных бенчмарков. @@ -26,7 +26,6 @@ def run_benchmarks(ollama_client: OllamaClient, model_name: str, benchmarks: Lis ollama_client: Клиент для работы с Ollama model_name: Название модели benchmarks: Список имен бенчмарков для запуска - num_ctx: Размер контекста Returns: Список результатов бенчмарков @@ -46,7 +45,7 @@ def run_benchmarks(ollama_client: OllamaClient, model_name: str, benchmarks: Lis logging.info(f"Running {benchmark_name} benchmark...") benchmark = benchmark_classes[benchmark_name]() - result = benchmark.run(ollama_client, model_name, num_ctx) + result = benchmark.run(ollama_client, model_name) results.append(result) return results @@ -56,11 +55,11 @@ def main(): parser = argparse.ArgumentParser(description='LLM Benchmarking Tool') parser.add_argument('-m', '--model', required=True, help='Название модели для тестирования') parser.add_argument('-u', '--ollama-url', default='http://localhost:11434', help='URL подключения к Ollama серверу') + parser.add_argument('-c', '--context-size', type=int, default=32000, help='Размер контекста для модели (по умолчанию 32000)') parser.add_argument('-b', '--benchmarks', nargs='+', default=['translation', 'summarization', 'codegen'], help='Список бенчмарков для выполнения (translation, summarization, codegen)') parser.add_argument('-o', '--output', default='results', help='Директория для сохранения результатов') parser.add_argument('-v', '--verbose', action='store_true', help='Подробный режим вывода') - parser.add_argument('--num-ctx', type=int, default=32000, help='Размер контекста для модели (по умолчанию 32000)') args = parser.parse_args() @@ -72,12 +71,11 @@ def main(): logging.info(f"Benchmarks to run: {', '.join(args.benchmarks)}") logging.info(f"Context size: {args.num_ctx}") + # Инициализация клиента + ollama_client = OllamaClient(args.ollama_url) try: - # Инициализация клиента - ollama_client = OllamaClient(args.ollama_url) - # Запуск бенчмарков - results = run_benchmarks(ollama_client, args.model, args.benchmarks, args.num_ctx) + results = run_benchmarks(ollama_client, args.model, args.benchmarks, args.context_size) # Генерация отчетов report_generator = ReportGenerator() @@ -89,9 +87,8 @@ def main(): report_generator.generate_summary_report(results, args.output, args.model, args.ollama_url) logging.info("Benchmarking completed successfully!") - except Exception as e: - logging.error(f"Error during benchmarking: {e}", exc_info=True) + logging.error(f"Error during benchmarking: {e}") return 1 return 0 diff --git a/src/models/__pycache__/ollama_client.cpython-313.pyc b/src/models/__pycache__/ollama_client.cpython-313.pyc index 7d3f8ce..d3f70a1 100644 Binary files a/src/models/__pycache__/ollama_client.cpython-313.pyc and b/src/models/__pycache__/ollama_client.cpython-313.pyc differ diff --git a/src/models/ollama_client.py b/src/models/ollama_client.py index 3ac8b96..20faacd 100644 --- a/src/models/ollama_client.py +++ b/src/models/ollama_client.py @@ -45,7 +45,30 @@ class OllamaClient: options=options, **kwargs ) - return response['response'] + # Проверяем структуру ответа + self.logger.debug(f"Response structure: {response}") + self.logger.debug(f"Response type: {type(response)}") + + # Если это объект GenerateResponse (как в ollama 0.3+) + if hasattr(response, 'response'): + return response.response + elif hasattr(response, 'text'): + return response.text + elif isinstance(response, dict): + if 'response' in response: + return response['response'] + elif 'text' in response: + return response['text'] + else: + # Попробуем извлечь любое строковое значение + for key, value in response.items(): + if isinstance(value, str): + return value + raise ValueError(f"Unexpected response format - no text or response field: {response}") + elif isinstance(response, str): + return response + else: + raise ValueError(f"Unexpected response format: {response}") except Exception as e: error_msg = f"Error generating response for model {model}: {e}" self.logger.error(error_msg) diff --git a/src/utils/__pycache__/report.cpython-313.pyc b/src/utils/__pycache__/report.cpython-313.pyc index e1b31d9..c6a5895 100644 Binary files a/src/utils/__pycache__/report.cpython-313.pyc and b/src/utils/__pycache__/report.cpython-313.pyc differ