From 58a56ab00145966952a365296cdb8d83c19bb5ab Mon Sep 17 00:00:00 2001
From: Nat Kershaw <nakersha@microsoft.com>
Date: Tue, 17 Sep 2024 11:17:02 -0700
Subject: [PATCH 1/3] Llama example

---
 examples/c/CMakeLists.txt | 19 +++++++++
 examples/c/src/llama.cpp  | 86 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 examples/c/src/llama.cpp
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index 56420786e..a5e032c08 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -30,6 +30,7 @@ endif()
 
 add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/main.cpp)
 add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp)
+add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp)
 
 
 target_link_directories(phi3 PRIVATE ${ORT_GENAI_LIB_DIR})
@@ -38,10 +39,15 @@ target_include_directories(phi3 PRIVATE ${CMAKE_SOURCE_DIR}/include)
 target_link_directories(phi3v PRIVATE ${ORT_GENAI_LIB_DIR})
 target_link_libraries(phi3v PRIVATE ${ONNXRUNTIME_GENAI_LIB})
 target_include_directories(phi3v PRIVATE ${CMAKE_SOURCE_DIR}/include)
+target_link_directories(llama PRIVATE ${ORT_GENAI_LIB_DIR})
+target_link_libraries(llama PRIVATE ${ONNXRUNTIME_GENAI_LIB})
+target_include_directories(llama PRIVATE ${CMAKE_SOURCE_DIR}/include)
+
 
 if(USE_CUDA)
   set_target_properties(phi3 PROPERTIES LINKER_LANGUAGE CUDA)
   set_target_properties(phi3v PROPERTIES LINKER_LANGUAGE CUDA)
+  set_target_properties(llama PROPERTIES LINKER_LANGUAGE CUDA)
 endif()
 
 target_link_libraries(
@@ -53,6 +59,11 @@ target_link_libraries(
         PUBLIC
         onnxruntime-genai)
 
+target_link_libraries(
+        llama
+        PUBLIC
+        onnxruntime-genai)
+
 if(USE_CUDA)
   target_link_libraries(
         phi3
@@ -62,6 +73,10 @@ if(USE_CUDA)
         phi3v
         PUBLIC
         cublas curand cudart)
+  target_link_libraries(
+        llama
+        PUBLIC
+        cublas curand cudart)
 endif()
 
 file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}")
@@ -75,4 +90,8 @@ foreach(DLL_FILE ${ort_genai_libs})
     TARGET phi3v POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:phi3v>
   )
+  add_custom_command(
+    TARGET llama POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:phi3v>
+  )
 endforeach()
\ No newline at end of file
diff --git a/examples/c/src/llama.cpp b/examples/c/src/llama.cpp
new file mode 100644
index 000000000..d8a8bd7c7
--- /dev/null
+++ b/examples/c/src/llama.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <iostream>
+#include <string>
+#include "ort_genai.h"
+
+// C++ API Example
+
+void CXX_API(const char* model_path) {
+  std::cout << "Creating model..." << std::endl;
+  auto model = OgaModel::Create(model_path);
+  std::cout << "Creating tokenizer..." << std::endl;
+  auto tokenizer = OgaTokenizer::Create(*model);
+  auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer);
+
+  while (true) {
+    std::string text;
+    std::cout << "Prompt: " << std::endl;
+    std::getline(std::cin, text);
+
+    const std::string prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant. Give a short answer to the following<|eot_id|><|start_header_id|>user<|end_header_id|>" + text + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>";
+
+    auto sequences = OgaSequences::Create();
+    tokenizer->Encode(prompt.c_str(), *sequences);
+
+    std::cout << "Generating response..." << std::endl;
+    auto params = OgaGeneratorParams::Create(*model);
+    params->SetSearchOption("max_length", 1024);
+    params->SetSearchOptionBool("do_sample", true);
+    params->SetInputSequences(*sequences);
+
+    auto generator = OgaGenerator::Create(*model, *params);
+
+    while (!generator->IsDone()) {
+      generator->ComputeLogits();
+      generator->GenerateNextToken();
+
+      // Show usage of GetOutput
+      std::unique_ptr<OgaTensor> output_logits = generator->GetOutput("logits");
+
+      // Assuming output_logits.Type() is float as it's logits
+      // Assuming shape is 1 dimensional with shape[0] being the size
+      auto logits = reinterpret_cast<float*>(output_logits->Data());
+
+      // Print out the logits using the following snippet, if needed
+      //auto shape = output_logits->Shape();
+      //for (size_t i=0; i < shape[0]; i++)
+      //   std::cout << logits[i] << " ";
+      //std::cout << std::endl;
+
+      const auto num_tokens = generator->GetSequenceCount(0);
+      const auto new_token = generator->GetSequenceData(0)[num_tokens - 1];
+      std::cout << tokenizer_stream->Decode(new_token) << std::flush;
+    }
+
+    for (int i = 0; i < 3; ++i)
+      std::cout << std::endl;
+  }
+}
+
+
+static void print_usage(int /*argc*/, char** argv) {
+  std::cerr << "usage: " << argv[0] << " model_path" << std::endl;
+}
+
+int main(int argc, char** argv) {
+  if (argc != 2) {
+    print_usage(argc, argv);
+    return -1;
+  }
+
+  // Responsible for cleaning up the library during shutdown
+  OgaHandle handle;
+
+  std::cout << "-------------" << std::endl;
+  std::cout << "Run Llama" << std::endl;
+  std::cout << "-------------" << std::endl;
+
+#ifdef USE_CXX
+  std::cout << "C++ API" << std::endl;
+  CXX_API(argv[1]);
+#endif
+
+  return 0;
+}
\ No newline at end of file

From 03d43f8d886d87305be8c61a42d499a78117ab45 Mon Sep 17 00:00:00 2001
From: Natalie Kershaw <nakersha@microsoft.com>
Date: Tue, 24 Sep 2024 16:29:58 -0700
Subject: [PATCH 2/3] Changes to build file

---
 examples/c/CMakeLists.txt   | 99 +++++++++++++++----------------------
 examples/c/include/.gitkeep |  0
 examples/c/lib/.gitkeep     |  0
 3 files changed, 39 insertions(+), 60 deletions(-)
 delete mode 100644 examples/c/include/.gitkeep
 delete mode 100644 examples/c/lib/.gitkeep

diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index a5e032c08..f73d6b30f 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -5,6 +5,10 @@ set(CMAKE_CXX_STANDARD 20)
 
 option(USE_CUDA "Build with CUDA support" OFF)
 option(USE_CXX "Invoke the C++ example" ON)
+option(PHI3 "Build the Phi example" OFF)
+option(LLAMA "Build the Llama example" OFF)
+option(PHI3V "Build the Phi3v example" OFF)
+option(WHISPER "Build the Whisper example" OFF)
 
 if(USE_CXX)
   add_compile_definitions(USE_CXX)
@@ -28,70 +32,45 @@ else()
   set(ONNXRUNTIME_GENAI_DEPENDENCY "*.so")
 endif()
 
-add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/main.cpp)
-add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp)
-add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp)
-
-
-target_link_directories(phi3 PRIVATE ${ORT_GENAI_LIB_DIR})
-target_link_libraries(phi3 PRIVATE ${ONNXRUNTIME_GENAI_LIB})
-target_include_directories(phi3 PRIVATE ${CMAKE_SOURCE_DIR}/include)
-target_link_directories(phi3v PRIVATE ${ORT_GENAI_LIB_DIR})
-target_link_libraries(phi3v PRIVATE ${ONNXRUNTIME_GENAI_LIB})
-target_include_directories(phi3v PRIVATE ${CMAKE_SOURCE_DIR}/include)
-target_link_directories(llama PRIVATE ${ORT_GENAI_LIB_DIR})
-target_link_libraries(llama PRIVATE ${ONNXRUNTIME_GENAI_LIB})
-target_include_directories(llama PRIVATE ${CMAKE_SOURCE_DIR}/include)
 
+file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}")
 
-if(USE_CUDA)
-  set_target_properties(phi3 PROPERTIES LINKER_LANGUAGE CUDA)
-  set_target_properties(phi3v PROPERTIES LINKER_LANGUAGE CUDA)
-  set_target_properties(llama PROPERTIES LINKER_LANGUAGE CUDA)
+function(prepare_executable executable)
+  target_link_directories(${executable} PRIVATE ${ORT_GENAI_LIB_DIR})
+  target_link_libraries(${executable} PRIVATE ${ONNXRUNTIME_GENAI_LIB})
+  target_include_directories(${executable} PRIVATE ${CMAKE_SOURCE_DIR}/include)
+
+  if(USE_CUDA)
+    set_target_properties(${executable} PROPERTIES LINKER_LANGUAGE CUDA)
+    target_link_libraries(${executable} PRIVATE cublas curand cudart)
+  endif()
+
+  target_link_libraries(${executable} PUBLIC onnxruntime-genai)
+
+  foreach(DLL_FILE ${ort_genai_libs})
+    add_custom_command(
+      TARGET ${executable} POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:${executable}>
+    )
+  endforeach()
+endfunction()
+
+if(PHI3)
+  add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/phi3.cpp)
+  prepare_executable(phi3)
 endif()
 
-target_link_libraries(
-        phi3
-        PUBLIC
-        onnxruntime-genai)
-target_link_libraries(
-        phi3v
-        PUBLIC
-        onnxruntime-genai)
-
-target_link_libraries(
-        llama
-        PUBLIC
-        onnxruntime-genai)
-
-if(USE_CUDA)
-  target_link_libraries(
-        phi3
-        PUBLIC
-        cublas curand cudart)
-  target_link_libraries(
-        phi3v
-        PUBLIC
-        cublas curand cudart)
-  target_link_libraries(
-        llama
-        PUBLIC
-        cublas curand cudart)
+if(LLAMA)
+  add_executable(llama ${CMAKE_SOURCE_DIR}/src/llama.cpp)
+  prepare_executable(llama)
 endif()
 
-file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}")
+if(PHI3V)
+  add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp)
+  prepare_executable(phi3v)
+endif()
 
-foreach(DLL_FILE ${ort_genai_libs})
-  add_custom_command(
-    TARGET phi3 POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:phi3>
-  )
-  add_custom_command(
-    TARGET phi3v POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:phi3v>
-  )
-  add_custom_command(
-    TARGET llama POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $<TARGET_FILE_DIR:phi3v>
-  )
-endforeach()
\ No newline at end of file
+if(WHISPER)
+  add_executable(whisper ${CMAKE_SOURCE_DIR}/src/whisper.cpp)
+  prepare_executable(whisper)
+endif()
diff --git a/examples/c/include/.gitkeep b/examples/c/include/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/c/lib/.gitkeep b/examples/c/lib/.gitkeep
deleted file mode 100644
index e69de29bb..000000000

From e6f150020fd6211341cfb9117ee70fcf4929dacf Mon Sep 17 00:00:00 2001
From: Natalie Kershaw <nakersha@microsoft.com>
Date: Wed, 25 Sep 2024 14:12:22 -0700
Subject: [PATCH 3/3] Add Llama to README

---
 examples/c/README.md | 131 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 120 insertions(+), 11 deletions(-)

diff --git a/examples/c/README.md b/examples/c/README.md
index 0fd01018d..e80b1eb46 100644
--- a/examples/c/README.md
+++ b/examples/c/README.md
@@ -221,15 +221,125 @@ Change into the onnxruntime-genai directory.
 
 2. Build onnxruntime-genai from source and install
 
-   This example requires onnxruntime-genai to be built from source.
+   ```bash
+   curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-linux-cpu-x64-capi.zip -o onnxruntime-genai-linux-cpu-x64-capi.zip
+   unzip onnxruntime-genai-linux-cpu-x64-capi.zip
+   cd onnxruntime-genai-linux-cpu-x64-capi
+   tar xvf onnxruntime-genai-0.4.0-linux-x64.tar.gz
+   cp onnxruntime-genai-0.4.0-linux-x64/include/* ../include
+   cp onnxruntime-genai-0.4.0-linux-x64/lib/* ../lib
+   cd ..
+   ```
+
+#### Build this sample
+
+Build with CUDA:
+
+```bash
+cmake . -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON
+cd build
+cmake --build . --config Release
+```
+
+Build for CPU:
+
+```bash
+cmake . -B build -DPHI3=ON
+cd build
+cmake --build . --config Release
+```
+
+#### Run the sample
+
+```bash
+./phi3 path_to_model
+```
+
+## Llama
+
+### Obtain model
+
+To access Llama models, you need to sign the license agreement on HuggingFace. Navigate to the model on HuggingFace e.g. https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct and sign the license agreement.
+
+Once you have been granted access, run the following steps to generate the ONNX model in the precision and for the target that you want to run on. Note: this operations requires 64GB of RAM to complete.
+
+```bash
+pip install torch transformers onnx onnxruntime onnxruntime-genai huggingface-hub[cli]
+huggingface-cli login
+python onnxruntime_genai.models.builder -m meta-llama/Llama-3.1-8B-Instruct -e cpu -p int4 -o llama-3.1-8b-instruct-onnx
+```
+
+The models and all of the necessary meta data will be available in a folder called `llama-3.1-8b-instruct-onnx`.
+
+### Windows x64 CPU
+
+#### Install the onnxruntime and onnxruntime-genai binaries
+
+Change into the `onnxruntime-genai\examples\c` folder.
+
+1. Install onnxruntime
+   
+   ```cmd
+   curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-win-x64-1.19.2.zip -o onnxruntime-win-x64-1.19.2.zip
+   tar xvf onnxruntime-win-x64-1.19.2.zip
+   copy onnxruntime-win-x64-1.19.2\include\* include
+   copy onnxruntime-win-x64-1.19.2\lib\* lib
+   ```
+
+2. Install onnxruntime-genai
+
+   ```cmd
+   curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-win-cpu-x64-capi.zip -o onnxruntime-genai-win-cpu-x64-capi.zip
+   tar xvf onnxruntime-genai-win-cpu-x64-capi.zip
+   cd onnxruntime-genai-win-cpu-x64-capi
+   tar xvf onnxruntime-genai-0.4.0-win-x64.zip
+   copy onnxruntime-genai-0.4.0-win-x64\include\* ..\include
+   copy onnxruntime-genai-0.4.0-win-x64\lib\* ..\lib
+   cd ..
+   ```
+
+#### Build this sample
+
+```bash
+cmake -A x64 -S . -B build -DLLAMA=ON
+cd build
+cmake --build . --config Release
+```
+
+#### Run the sample
+
+```bash
+cd Release
+.\llama.exe llama-3.1-8b-instruct-onnx
+```
+
+### Linux
+
+#### Install the onnxruntime and onnxruntime-genai binaries
+
+Change into the onnxruntime-genai directory.
+
+1. Install onnxruntime
 
    ```bash
-   # This should be run from the root of the onnxruntime-genai folder
-   python build.py --config Release --ort_home examples\c
-   cp src/ort_genai.h examples/c/include
-   cp src/ort_genai_c.h examples/c/include
-   cp build/Linux/release/onnxruntime-genai.so examples/c/lib
    cd examples/c
+   curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-linux-x64-1.19.2.tgz -o onnxruntime-linux-x64-1.19.2.tgz
+   tar xvzf onnxruntime-linux-x64-1.19.2.tgz
+   cp onnxruntime-linux-x64-1.19.2/include/* include
+   cp onnxruntime-linux-x64-1.19.2/lib/* lib
+   cd ../..
+   ```
+
+2. Build onnxruntime-genai from source and install
+
+   ```bash
+   curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-linux-cpu-x64-capi.zip -o onnxruntime-genai-linux-cpu-x64-capi.zip
+   unzip onnxruntime-genai-linux-cpu-x64-capi.zip
+   cd onnxruntime-genai-linux-cpu-x64-capi
+   tar xvf onnxruntime-genai-0.4.0-linux-x64.tar.gz
+   cp onnxruntime-genai-0.4.0-linux-x64/include/* ../include
+   cp onnxruntime-genai-0.4.0-linux-x64/lib/* ../lib
+   cd ..
    ```
 
 #### Build this sample
@@ -237,16 +347,15 @@ Change into the onnxruntime-genai directory.
 Build with CUDA:
 
 ```bash
-mkdir build
+cmake . -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON
 cd build
-cmake ../ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON -DPHI3=ON
 cmake --build . --config Release
 ```
 
 Build for CPU:
 
 ```bash
-cmake . -DPHI3=ON
+cmake . -B build -DPHI3=ON
 cd build
 cmake --build . --config Release
 ```
@@ -254,10 +363,10 @@ cmake --build . --config Release
 #### Run the sample
 
 ```bash
-cd Release
-./phi3 path_to_model
+./llama path_to_model
 ```
 
+
 ## Phi-3 vision
 
 ### Download model