From 58a56ab00145966952a365296cdb8d83c19bb5ab Mon Sep 17 00:00:00 2001 From: Nat Kershaw Date: Tue, 17 Sep 2024 11:17:02 -0700 Subject: [PATCH 1/3] Llama example --- examples/c/CMakeLists.txt | 19 +++++++++ examples/c/src/llama.cpp | 86 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 examples/c/src/llama.cpp diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index 56420786e..a5e032c08 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -30,6 +30,7 @@ endif() add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/main.cpp) add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp) +add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp) target_link_directories(phi3 PRIVATE ${ORT_GENAI_LIB_DIR}) @@ -38,10 +39,15 @@ target_include_directories(phi3 PRIVATE ${CMAKE_SOURCE_DIR}/include) target_link_directories(phi3v PRIVATE ${ORT_GENAI_LIB_DIR}) target_link_libraries(phi3v PRIVATE ${ONNXRUNTIME_GENAI_LIB}) target_include_directories(phi3v PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_link_directories(llama PRIVATE ${ORT_GENAI_LIB_DIR}) +target_link_libraries(llama PRIVATE ${ONNXRUNTIME_GENAI_LIB}) +target_include_directories(llama PRIVATE ${CMAKE_SOURCE_DIR}/include) + if(USE_CUDA) set_target_properties(phi3 PROPERTIES LINKER_LANGUAGE CUDA) set_target_properties(phi3v PROPERTIES LINKER_LANGUAGE CUDA) + set_target_properties(llama PROPERTIES LINKER_LANGUAGE CUDA) endif() target_link_libraries( @@ -53,6 +59,11 @@ target_link_libraries( PUBLIC onnxruntime-genai) +target_link_libraries( + llama + PUBLIC + onnxruntime-genai) + if(USE_CUDA) target_link_libraries( phi3 @@ -62,6 +73,10 @@ if(USE_CUDA) phi3v PUBLIC cublas curand cudart) + target_link_libraries( + llama + PUBLIC + cublas curand cudart) endif() file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}") @@ -75,4 +90,8 @@ foreach(DLL_FILE ${ort_genai_libs}) TARGET phi3v POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ ) + add_custom_command( + TARGET llama POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ + ) endforeach() \ No newline at end of file diff --git a/examples/c/src/llama.cpp b/examples/c/src/llama.cpp new file mode 100644 index 000000000..d8a8bd7c7 --- /dev/null +++ b/examples/c/src/llama.cpp @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include "ort_genai.h" + +// C++ API Example + +void CXX_API(const char* model_path) { + std::cout << "Creating model..." << std::endl; + auto model = OgaModel::Create(model_path); + std::cout << "Creating tokenizer..." << std::endl; + auto tokenizer = OgaTokenizer::Create(*model); + auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); + + while (true) { + std::string text; + std::cout << "Prompt: " << std::endl; + std::getline(std::cin, text); + + const std::string prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant. Give a short answer to the following<|eot_id|><|start_header_id|>user<|end_header_id|>" + text + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"; + + auto sequences = OgaSequences::Create(); + tokenizer->Encode(prompt.c_str(), *sequences); + + std::cout << "Generating response..." << std::endl; + auto params = OgaGeneratorParams::Create(*model); + params->SetSearchOption("max_length", 1024); + params->SetSearchOptionBool("do_sample", true); + params->SetInputSequences(*sequences); + + auto generator = OgaGenerator::Create(*model, *params); + + while (!generator->IsDone()) { + generator->ComputeLogits(); + generator->GenerateNextToken(); + + // Show usage of GetOutput + std::unique_ptr output_logits = generator->GetOutput("logits"); + + // Assuming output_logits.Type() is float as it's logits + // Assuming shape is 1 dimensional with shape[0] being the size + auto logits = reinterpret_cast(output_logits->Data()); + + // Print out the logits using the following snippet, if needed + //auto shape = output_logits->Shape(); + //for (size_t i=0; i < shape[0]; i++) + // std::cout << logits[i] << " "; + //std::cout << std::endl; + + const auto num_tokens = generator->GetSequenceCount(0); + const auto new_token = generator->GetSequenceData(0)[num_tokens - 1]; + std::cout << tokenizer_stream->Decode(new_token) << std::flush; + } + + for (int i = 0; i < 3; ++i) + std::cout << std::endl; + } +} + + +static void print_usage(int /*argc*/, char** argv) { + std::cerr << "usage: " << argv[0] << " model_path" << std::endl; +} + +int main(int argc, char** argv) { + if (argc != 2) { + print_usage(argc, argv); + return -1; + } + + // Responsible for cleaning up the library during shutdown + OgaHandle handle; + + std::cout << "-------------" << std::endl; + std::cout << "Run Llama" << std::endl; + std::cout << "-------------" << std::endl; + +#ifdef USE_CXX + std::cout << "C++ API" << std::endl; + CXX_API(argv[1]); +#endif + + return 0; +} \ No newline at end of file From 03d43f8d886d87305be8c61a42d499a78117ab45 Mon Sep 17 00:00:00 2001 From: Natalie Kershaw Date: Tue, 24 Sep 2024 16:29:58 -0700 Subject: [PATCH 2/3] Changes to build file --- examples/c/CMakeLists.txt | 99 +++++++++++++++---------------------- examples/c/include/.gitkeep | 0 examples/c/lib/.gitkeep | 0 3 files changed, 39 insertions(+), 60 deletions(-) delete mode 100644 examples/c/include/.gitkeep delete mode 100644 examples/c/lib/.gitkeep diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index a5e032c08..f73d6b30f 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -5,6 +5,10 @@ set(CMAKE_CXX_STANDARD 20) option(USE_CUDA "Build with CUDA support" OFF) option(USE_CXX "Invoke the C++ example" ON) +option(PHI3 "Build the Phi example" OFF) +option(LLAMA "Build the Llama example" OFF) +option(PHI3V "Build the Phi3v example" OFF) +option(WHISPER "Build the Whisper example" OFF) if(USE_CXX) add_compile_definitions(USE_CXX) @@ -28,70 +32,45 @@ else() set(ONNXRUNTIME_GENAI_DEPENDENCY "*.so") endif() -add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/main.cpp) -add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp) -add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp) - - -target_link_directories(phi3 PRIVATE ${ORT_GENAI_LIB_DIR}) -target_link_libraries(phi3 PRIVATE ${ONNXRUNTIME_GENAI_LIB}) -target_include_directories(phi3 PRIVATE ${CMAKE_SOURCE_DIR}/include) -target_link_directories(phi3v PRIVATE ${ORT_GENAI_LIB_DIR}) -target_link_libraries(phi3v PRIVATE ${ONNXRUNTIME_GENAI_LIB}) -target_include_directories(phi3v PRIVATE ${CMAKE_SOURCE_DIR}/include) -target_link_directories(llama PRIVATE ${ORT_GENAI_LIB_DIR}) -target_link_libraries(llama PRIVATE ${ONNXRUNTIME_GENAI_LIB}) -target_include_directories(llama PRIVATE ${CMAKE_SOURCE_DIR}/include) +file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}") -if(USE_CUDA) - set_target_properties(phi3 PROPERTIES LINKER_LANGUAGE CUDA) - set_target_properties(phi3v PROPERTIES LINKER_LANGUAGE CUDA) - set_target_properties(llama PROPERTIES LINKER_LANGUAGE CUDA) +function(prepare_executable executable) + target_link_directories(${executable} PRIVATE ${ORT_GENAI_LIB_DIR}) + target_link_libraries(${executable} PRIVATE ${ONNXRUNTIME_GENAI_LIB}) + target_include_directories(${executable} PRIVATE ${CMAKE_SOURCE_DIR}/include) + + if(USE_CUDA) + set_target_properties(${executable} PROPERTIES LINKER_LANGUAGE CUDA) + target_link_libraries(${executable} PRIVATE cublas curand cudart) + endif() + + target_link_libraries(${executable} PUBLIC onnxruntime-genai) + + foreach(DLL_FILE ${ort_genai_libs}) + add_custom_command( + TARGET ${executable} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ + ) + endforeach() +endfunction() + +if(PHI3) + add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/phi3.cpp) + prepare_executable(phi3) endif() -target_link_libraries( - phi3 - PUBLIC - onnxruntime-genai) -target_link_libraries( - phi3v - PUBLIC - onnxruntime-genai) - -target_link_libraries( - llama - PUBLIC - onnxruntime-genai) - -if(USE_CUDA) - target_link_libraries( - phi3 - PUBLIC - cublas curand cudart) - target_link_libraries( - phi3v - PUBLIC - cublas curand cudart) - target_link_libraries( - llama - PUBLIC - cublas curand cudart) +if(LLAMA) + add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp) + prepare_executable(llama) endif() -file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}") +if(PHI3V) + add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp) + prepare_executable(phi3v) +endif() -foreach(DLL_FILE ${ort_genai_libs}) - add_custom_command( - TARGET phi3 POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ - ) - add_custom_command( - TARGET phi3v POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ - ) - add_custom_command( - TARGET llama POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ - ) -endforeach() \ No newline at end of file +if(WHISPER) + add_executable(whisper ${CMAKE_SOURCE_DIR}/src/whisper.cpp) + prepare_executable(whisper) +endif() diff --git a/examples/c/include/.gitkeep b/examples/c/include/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/c/lib/.gitkeep b/examples/c/lib/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From e6f150020fd6211341cfb9117ee70fcf4929dacf Mon Sep 17 00:00:00 2001 From: Natalie Kershaw Date: Wed, 25 Sep 2024 14:12:22 -0700 Subject: [PATCH 3/3] Add Llama to README --- examples/c/README.md | 131 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 120 insertions(+), 11 deletions(-) diff --git a/examples/c/README.md b/examples/c/README.md index 0fd01018d..e80b1eb46 100644 --- a/examples/c/README.md +++ b/examples/c/README.md @@ -221,15 +221,125 @@ Change into the onnxruntime-genai directory. 2. Build onnxruntime-genai from source and install - This example requires onnxruntime-genai to be built from source. + ```bash + curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-linux-cpu-x64-capi.zip -o onnxruntime-genai-linux-cpu-x64-capi.zip + unzip onnxruntime-genai-linux-cpu-x64-capi.zip + cd onnxruntime-genai-linux-cpu-x64-capi + tar xvf onnxruntime-genai-0.4.0-linux-x64.tar.gz + cp onnxruntime-genai-0.4.0-linux-x64/include/* ../include + cp onnxruntime-genai-0.4.0-linux-x64/lib/* ../lib + cd .. + ``` + +#### Build this sample + +Build with CUDA: + +```bash +cmake . -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON +cd build +cmake --build . --config Release +``` + +Build for CPU: + +```bash +cmake . -B build -DPHI3=ON +cd build +cmake --build . --config Release +``` + +#### Run the sample + +```bash +./phi3 path_to_model +``` + +## Llama + +### Obtain model + +To access Llama models, you need to sign the license agreement on HuggingFace. Navigate to the model on HuggingFace e.g. https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct and sign the license agreement. + +Once you have been granted access, run the following steps to generate the ONNX model in the precision and for the target that you want to run on. Note: this operations requires 64GB of RAM to complete. + +```bash +pip install torch transformers onnx onnxruntime onnxruntime-genai huggingface-hub[cli] +huggingface-cli login +python onnxruntime_genai.models.builder -m meta-llama/Llama-3.1-8B-Instruct -e cpu -p int4 -o llama-3.1-8b-instruct-onnx +``` + +The models and all of the necessary meta data will be available in a folder called `llama-3.1-8b-instruct-onnx`. + +### Windows x64 CPU + +#### Install the onnxruntime and onnxruntime-genai binaries + +Change into the `onnxruntime-genai\examples\c` folder. + +1. Install onnxruntime + + ```cmd + curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-win-x64-1.19.2.zip -o onnxruntime-win-x64-1.19.2.zip + tar xvf onnxruntime-win-x64-1.19.2.zip + copy onnxruntime-win-x64-1.19.2\include\* include + copy onnxruntime-win-x64-1.19.2\lib\* lib + ``` + +2. Install onnxruntime-genai + + ```cmd + curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-win-cpu-x64-capi.zip -o onnxruntime-genai-win-cpu-x64-capi.zip + tar xvf onnxruntime-genai-win-cpu-x64-capi.zip + cd onnxruntime-genai-win-cpu-x64-capi + tar xvf onnxruntime-genai-0.4.0-win-x64.zip + copy onnxruntime-genai-0.4.0-win-x64\include\* ..\include + copy onnxruntime-genai-0.4.0-win-x64\lib\* ..\lib + cd .. + ``` + +#### Build this sample + +```bash +cmake -A x64 -S . -B build -DLLAMA=ON +cd build +cmake --build . --config Release +``` + +#### Run the sample + +```bash +cd Release +.\llama.exe llama-3.1-8b-instruct-onnx +``` + +### Linux + +#### Install the onnxruntime and onnxruntime-genai binaries + +Change into the onnxruntime-genai directory. + +1. Install onnxruntime ```bash - # This should be run from the root of the onnxruntime-genai folder - python build.py --config Release --ort_home examples\c - cp src/ort_genai.h examples/c/include - cp src/ort_genai_c.h examples/c/include - cp build/Linux/release/onnxruntime-genai.so examples/c/lib cd examples/c + curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-linux-x64-1.19.2.tgz -o onnxruntime-linux-x64-1.19.2.tgz + tar xvzf onnxruntime-linux-x64-1.19.2.tgz + cp onnxruntime-linux-x64-1.19.2/include/* include + cp onnxruntime-linux-x64-1.19.2/lib/* lib + cd ../.. + ``` + +2. Build onnxruntime-genai from source and install + + ```bash + curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-linux-cpu-x64-capi.zip -o onnxruntime-genai-linux-cpu-x64-capi.zip + unzip onnxruntime-genai-linux-cpu-x64-capi.zip + cd onnxruntime-genai-linux-cpu-x64-capi + tar xvf onnxruntime-genai-0.4.0-linux-x64.tar.gz + cp onnxruntime-genai-0.4.0-linux-x64/include/* ../include + cp onnxruntime-genai-0.4.0-linux-x64/lib/* ../lib + cd .. ``` #### Build this sample @@ -237,16 +347,15 @@ Change into the onnxruntime-genai directory. Build with CUDA: ```bash -mkdir build +cmake . -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON cd build -cmake ../ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON cmake --build . --config Release ``` Build for CPU: ```bash -cmake . -DPHI3=ON +cmake . -B build -DPHI3=ON cd build cmake --build . --config Release ``` @@ -254,10 +363,10 @@ cmake --build . --config Release #### Run the sample ```bash -cd Release -./phi3 path_to_model +./llama path_to_model ``` + ## Phi-3 vision ### Download model