diff --git a/.DS_Store b/.DS_Store
index 37a822e..707a91f 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 0b76fe5..e479d63 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,4 +4,5 @@
     <option name="pythonIntegrationState" value="YES" />
   </component>
   <component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
+  <component name="WestSettings"><![CDATA[{}]]></component>
 </project>
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 44cfed9..d581385 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,38 +5,170 @@ set(CMAKE_CXX_STANDARD 23)
 
 include(FetchContent)
 
-find_package(Vulkan REQUIRED COMPONENTS volk)
+add_executable(v main.cpp
+        misc.cpp
+        misc.h
+        renderer/graphics.cpp
+        renderer/graphics.h
+        renderer/texture.cpp
+        renderer/texture.h
+        renderer/texture_sheet.cpp
+        renderer/texture_sheet.h
+        renderer/sprite.cpp
+        renderer/sprite.h
+        renderer/graphics_private.h
+)
 
-find_library(SLANG_LIB NAMES slang HINTS "$ENV{VULKAN_SDK}/lib")
-find_path(SLANG_INCLUDE_DIR NAMES slang/slang.h HINTS "$ENV{VULKAN_SDK}/include")
-find_file(SLANG_DLL NAMES slang.dll HINTS "$ENV{VULKAN_SDK}/bin")
+target_include_directories(v
+        PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+)
 
-if(SLANG_LIB AND SLANG_INCLUDE_DIR)
-    add_library(slang_sdk SHARED IMPORTED)
-    set_target_properties(slang_sdk PROPERTIES
-            IMPORTED_IMPLIB "${SLANG_LIB}"
-            INTERFACE_INCLUDE_DIRECTORIES "${SLANG_INCLUDE_DIR}"
+target_link_libraries(v PRIVATE glfw glm::glm stb)
+
+if (EMSCRIPTEN)
+    set(CMAKE_VERBOSE_MAKEFILE ON)
+	message(STATUS "Configuring for Emscripten...")
+
+    # 1. Find all .slang files in the shaders directory
+    file(GLOB SLANG_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/shaders/*.slang")
+
+    set(WGSL_OUTPUTS "")
+
+    # 2. Iterate through the found files
+    foreach(SOURCE_PATH ${SLANG_SOURCES})
+        # Get the filename without the path or extension (e.g., "triangle")
+        get_filename_component(FILENAME_WE ${SOURCE_PATH} NAME_WE)
+
+        # Define the output path in the build directory
+        set(OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/shaders/${FILENAME_WE}.wgsl")
+
+        # 3. Define the compilation command
+        add_custom_command(
+                OUTPUT ${OUTPUT_PATH}
+                COMMAND slangc ${SOURCE_PATH} -target wgsl -o ${OUTPUT_PATH}
+                DEPENDS ${SOURCE_PATH}
+                COMMENT "Compiling Slang: ${FILENAME_WE}.slang -> ${FILENAME_WE}.wgsl"
+                VERBATIM
+        )
+
+        list(APPEND WGSL_OUTPUTS ${OUTPUT_PATH})
+    endforeach()
+
+    # 4. Create a single target that tracks all these outputs
+    add_custom_target(CompileShaders ALL DEPENDS ${WGSL_OUTPUTS})
+
+    add_dependencies(v CompileShaders)
+
+    set_target_properties(v PROPERTIES SUFFIX ".html")
+    target_compile_options(v PRIVATE
+            "--use-port=emdawnwebgpu"
+            "--use-port=contrib.glfw3"
+    )
+    target_link_options(v PRIVATE
+            "--use-port=emdawnwebgpu"
+            "-sASYNCIFY=1"
+            "--use-port=contrib.glfw3"
+            "--preload-file" "${CMAKE_CURRENT_SOURCE_DIR}/shaders"
+    )
+    target_sources(v PRIVATE
+            renderer/webgpu/renderer.cpp
+            renderer/webgpu/renderer.h
+            renderer/webgpu/webgpu.cpp
+            renderer/webgpu/webgpu.h
+            renderer/webgpu/utils_emscripten.cpp
+    )
+elseif (WIN32)
+    message(STATUS "Configuring for Windows...")
+
+    find_package(slang REQUIRED)
+
+    find_package(Vulkan REQUIRED COMPONENTS volk)
+    find_package(glfw3 REQUIRED)
+
+    target_link_libraries(v PRIVATE Vulkan::Vulkan slang::slang)
+
+    target_sources(v PRIVATE
+        renderer/vulkan/vulkan.cpp
+        renderer/vulkan/vulkan.h
+        renderer/vulkan/renderer.cpp
+        renderer/vulkan/renderer.h
+    )
+elseif (APPLE)
+    message(STATUS "Configuring for macOS...")
+
+    find_package(slang REQUIRED)
+
+    find_package(glfw3 REQUIRED)
+
+    target_include_directories(v PUBLIC
+            "~/Dev/libraries/metal-cpp"
+            "~/Dev/libraries/metal-cpp-extensions"
     )
 
-    # If we found the DLL, set it as the location;
-    # otherwise, on non-Windows systems, SLANG_LIB is the location.
-    if(WIN32 AND SLANG_DLL)
-        set_target_properties(slang_sdk PROPERTIES IMPORTED_LOCATION "${SLANG_DLL}")
-    else()
-        set_target_properties(slang_sdk PROPERTIES IMPORTED_LOCATION "${SLANG_LIB}")
-    endif()
+    target_link_libraries(v PRIVATE
+            "-framework Metal"
+            "-framework MetalKit"
+            "-framework AppKit"
+            "-framework Foundation"
+            "-framework QuartzCore"
+            slang::slang
+    )
 
-    message(STATUS "Slang found via VULKAN_SDK: ${SLANG_LIB}")
-else()
-    message(FATAL_ERROR "VULKAN_SDK env var is set, but Slang wasn't found inside it!")
-endif()
+    target_sources(v PRIVATE
+            renderer/metal/metal.cpp
+            renderer/metal/metal.h
+            renderer/metal/renderer.cpp
+            renderer/metal/renderer.h
+    )
 
-FetchContent_Declare(
-        glfw
-        GIT_REPOSITORY https://github.com/glfw/glfw.git
-        GIT_TAG 232164f62b0edbf667cba37c91bab92ffbb020d0
-)
-FetchContent_MakeAvailable(glfw)
+#[[    #shaders
+
+    set(SHADER_DIR ${CMAKE_SOURCE_DIR}/shaders)
+    set(SHADER_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/renderer/metal)
+
+    set(SHADER_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/shaders)
+
+    file(GLOB SHADER_SOURCES
+            ${SHADER_DIR}/*.metal)
+
+    set(AIR_FILES)
+
+    foreach(SHADER ${SHADER_SOURCES})
+        get_filename_component(NAME ${SHADER} NAME_WE)
+        set(AIR_FILE ${SHADER_OUTPUT_DIR}/${NAME}.air)
+
+        add_custom_command(
+                OUTPUT ${AIR_FILE}
+                COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR}
+                COMMAND xcrun -sdk macosx metal -frecord-sources -gline-tables-only
+                -I ${SHADER_INCLUDE_DIR}
+                -c ${SHADER}
+                -o ${AIR_FILE}
+                DEPENDS ${SHADER}
+                COMMENT "Compiling ${NAME}.metal"
+        )
+
+        list(APPEND AIR_FILES ${AIR_FILE})
+    endforeach()
+
+    set(METALLIB ${SHADER_OUTPUT_DIR}/shaders.metallib)
+
+    add_custom_command(
+            OUTPUT ${METALLIB}
+            COMMAND xcrun -sdk macosx metallib
+            ${AIR_FILES}
+            -o ${METALLIB}
+            DEPENDS ${AIR_FILES}
+            COMMENT "Linking shaders.metallib"
+    )
+
+    add_custom_target(shaders ALL
+            DEPENDS ${METALLIB}
+    )
+
+    add_dependencies(v shaders)]]
+endif ()
 
 FetchContent_Declare(
         glm
@@ -56,27 +188,6 @@ FetchContent_MakeAvailable(stb)
 add_library(stb INTERFACE)
 target_include_directories(stb INTERFACE ${stb_SOURCE_DIR})
 
-add_executable(v main.cpp
-        renderer/init.cpp
-        renderer/init.h
-        renderer/renderer.cpp
-        renderer/renderer.h
-        misc.cpp
-        misc.h
-        renderer/sprite.cpp
-        renderer/sprite.h
-        renderer/texture.cpp
-        renderer/texture.h
-        renderer/texture_sheet.cpp
-        renderer/texture_sheet.h)
-
-target_include_directories(v
-        PRIVATE
-        ${CMAKE_CURRENT_SOURCE_DIR}
-)
-
-target_link_libraries(v PRIVATE glfw Vulkan::Vulkan glm::glm stb slang_sdk)
-
 set(SHADER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/shaders")
 set(SHADER_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/shaders")
 file(GLOB_RECURSE SHADER_SOURCES
@@ -85,7 +196,7 @@ file(GLOB_RECURSE SHADER_SOURCES
         "${SHADER_SOURCE_DIR}/*.comp"
 )
 
-set(SPIRV_BINARY_FILES "")
+#[[set(SPIRV_BINARY_FILES "")
 
 foreach(SHADER ${SHADER_SOURCES})
 
@@ -104,4 +215,4 @@ endforeach()
 
 add_custom_target(compile_shaders ALL DEPENDS ${SPIRV_BINARY_FILES})
 
-add_dependencies(v compile_shaders)
+add_dependencies(v compile_shaders)]]
diff --git a/main.cpp b/main.cpp
index c9f7f84..fd3d6eb 100644
--- a/main.cpp
+++ b/main.cpp
@@ -2,16 +2,17 @@
 #include <vector>
 #include <fstream>
 #include <cassert>
+#include <cstdint>
 
-#define VOLK_IMPLEMENTATION
-#include <Volk/volk.h>
-#define VMA_IMPLEMENTATION
-#include <vma/vk_mem_alloc.h>
+#ifdef __EMSCRIPTEN__
+#include <GLFW/emscripten_glfw3.h>
+#include <emscripten.h>
+#else
 #include <GLFW/glfw3.h>
+#endif
 
-#include "renderer/init.h"
-#include "renderer/renderer.h"
-#include "renderer/texture.h"
+#include "renderer/graphics.h"
+#include "renderer/texture_sheet.h"
 
 #define STB_IMAGE_IMPLEMENTATION
 #include <stb_image.h>
@@ -24,14 +25,25 @@ int32_t window_height = 480;
 uint64_t t           = 0;
 uint64_t accumulator = 0;
 
-uint64_t dt = 0;
+uint64_t dt = glfwGetTimerFrequency() / 60; // 1/60 s
 
-int main() {
+uint64_t current_time = glfwGetTimerValue();
+
+sprite_t sprite {
+    .origin = {0.5, 0.5},
+    .scale = {512, 512},
+    .rotation = 0.0,
+    .colour = {1.0, 1.0, 1.0, 1.0},
+    .alpha = 1.0,
+    .window_space = false,
+    .maintain_ar = false,
+    .texture = "assets/boy.png"
+};
+
+int init() {
     if (!glfwInit())
         return -1;
 
-    dt = (uint64_t) (1.0 / 60.0 * glfwGetTimerFrequency());
-
     std::println("Hello, Sailor!");
 
     glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
@@ -41,64 +53,67 @@ int main() {
         return -1;
     }
 
-    createInstance(window);
-    createSurface(window);
-    createDevice();
+    graphics_init(window);
 
-    createSwapchain(window);
-
-    slang::createGlobalSession(slangGlobalSession.writeRef());
-
-    Renderer renderer(window);
-
-    texture_manager.load("assets/boy.png", renderer);
-
-    uint64_t current_time = glfwGetTimerValue();
-
-    while (!glfwWindowShouldClose(window)) {
-
-        uint64_t new_time = glfwGetTimerValue();
-        uint64_t frame_time = new_time - current_time;
-        current_time = new_time;
-
-        accumulator += frame_time;
-
-        glfwPollEvents();
-
-        uint64_t updates = 0;
-
-        while (accumulator >= dt) {
-            accumulator -= dt;
-            t += dt;
-            updates++;
-        }
-        // std::println("Updates: {}", updates);
-        // std::println("frame time: {}", ((double) (frame_time) / (double) glfwGetTimerFrequency()));
-
-        renderer.begin_frame();
-
-        double f = 15.0 * (t / (double) glfwGetTimerFrequency());
-
-        renderer.submit_quad(
-    {
-                100.0 + 10.0 * glm::sin(f),
-                100.0 - 10.0 * glm::cos(f),
-            },
-            {100.0, 100.0});
-
-        renderer.submit_quad({400.0, 400.0}, {20.0, 20.0});
-
-        // renderer.submit_sprite();
-
-        renderer.end_frame();
-
-    }
-
-    vkDeviceWaitIdle(device);
-
-
-    glfwDestroyWindow(window);
-    glfwTerminate();
+    texture_manager.load("assets/boy.png");
+
+    return 0;
+}
+
+bool is_running() {
+    return !glfwWindowShouldClose(window);
+}
+
+void main_loop(void *data) {
+    uint64_t new_time = glfwGetTimerValue();
+    uint64_t frame_time = new_time - current_time;
+    current_time = new_time;
+
+    accumulator += frame_time;
+
+    glfwPollEvents();
+
+    uint64_t updates = 0;
+
+    while (accumulator >= dt) {
+        accumulator -= dt;
+        t += dt;
+        updates++;
+    }
+    // std::println("Updates: {}", updates);
+    // std::println("frame time: {}", ((double) (frame_time) / (double) glfwGetTimerFrequency()));
+    // std::println("fps: {}", 1.0 / ((double) (frame_time) / (double) glfwGetTimerFrequency()));
+
+    begin_frame();
+
+    double f = (t / (double) glfwGetTimerFrequency());
+    std::println("{}", f);
+
+    for (int x = sprite.scale.x / 2; x < window_width; x += sprite.scale.x) {
+        submit_sprite({x + 30 * cos(f), 200 + 30 * sin(f)}, sprite);
+    }
+
+    end_frame(window);
+}
+
+void deinit() {
+    graphics_deinit();
+    glfwDestroyWindow(window);
+    glfwTerminate();
+}
+
+int main() {
+    init();
+
+#ifdef __EMSCRIPTEN__
+    emscripten_set_main_loop_arg(main_loop, nullptr, 0, true);
+#else
+    while (is_running()) {
+        main_loop(nullptr);
+    }
+#endif
+
+    deinit();
 
     return 0;
 }
diff --git a/metal/CMakeLists.txt b/metal/CMakeLists.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/metal/definition.cpp b/metal/definition.cpp
deleted file mode 100644
index 24c2895..0000000
--- a/metal/definition.cpp
+++ /dev/null
@@ -1,3 +0,0 @@
-//
-// Created by Vicente Ferrari Smith on 26.02.26.
-//
diff --git a/misc.cpp b/misc.cpp
index b7b211f..3c3e049 100644
--- a/misc.cpp
+++ b/misc.cpp
@@ -5,15 +5,17 @@
 #include "misc.h"
 #include <fstream>
 
-std::vector<char> loadFile(const char* path) {
+std::string read_entire_file(const std::string &path) {
     std::ifstream f(path, std::ios::binary | std::ios::ate);
-    if (f.good()) {
-        const uint32_t size = f.tellg();
-        std::vector<char> data(size);
-        f.seekg(0);
-        f.read(data.data(), size);
-        return data;
-    }
 
-    return {};
+    if (!f.is_open()) return {};
+
+    const std::streamsize size = f.tellg();
+    std::string buffer;
+    buffer.resize(static_cast<size_t>(size)); // Pre-allocate the string memory
+
+    f.seekg(0);
+    f.read(&buffer[0], size); // Read directly into the string buffer
+
+    return buffer;
 }
diff --git a/misc.h b/misc.h
index e009cfc..3432639 100644
--- a/misc.h
+++ b/misc.h
@@ -7,7 +7,7 @@
 
 #include <vector>
 
-std::vector<char> loadFile(const char* path);
+std::string read_entire_file(const std::string &path);
 
 
 template<typename T, typename F>
diff --git a/renderer/graphics.cpp b/renderer/graphics.cpp
index 072d86b..e363a0c 100644
--- a/renderer/graphics.cpp
+++ b/renderer/graphics.cpp
@@ -1,3 +1,14 @@
 //
 // Created by Vicente Ferrari Smith on 02.03.26.
 //
+
+#include "graphics.h"
+#include "graphics_private.h"
+
+void graphics_init(GLFWwindow *window) {
+#ifndef __EMSCRIPTEN__
+    slang::createGlobalSession(slangGlobalSession.writeRef());
+#endif
+
+    platform_graphics_init(window);
+}
diff --git a/renderer/graphics.h b/renderer/graphics.h
index 750ab32..ce337a2 100644
--- a/renderer/graphics.h
+++ b/renderer/graphics.h
@@ -2,13 +2,47 @@
 // Created by Vicente Ferrari Smith on 26.02.26.
 //
 
-#ifndef V_RENDERER_H
-#define V_RENDERER_H
+#ifndef V_GRAPHICS_H
+#define V_GRAPHICS_H
 
 
-struct Graphics {
-    Graphics();
-};
+#ifdef __EMSCRIPTEN__
+#include "webgpu/renderer.h"
+#elifdef __APPLE__
+#include "metal/renderer.h"
+#elifdef _WIN32
+#include "vulkan/init.h"
+#include "vulkan/renderer.h"
+#endif
 
+#ifdef __EMSCRIPTEN__
+#include <GLFW/emscripten_glfw3.h>
+#else
+#include <GLFW/glfw3.h>
+#endif
+#include <glm/glm.hpp>
 
-#endif //V_RENDERER_H
\ No newline at end of file
+#ifndef __EMSCRIPTEN__
+#include <slang.h>
+#include <slang-com-ptr.h>
+inline Slang::ComPtr<slang::IGlobalSession> slangGlobalSession;
+inline Slang::ComPtr<slang::ISession> slangSession;
+#endif
+
+#include "sprite.h"
+
+void graphics_init(GLFWwindow *window);
+void graphics_deinit();
+void begin_frame();
+void end_frame(GLFWwindow *window);
+
+void submit_quad();
+void submit_sprite(glm::vec2 pos, const sprite_t &sprite);
+
+void upload_texture(
+    int w,
+    int h,
+    const void* pixels,
+    Texture *texture);
+
+#endif //V_GRAPHICS_H
\ No newline at end of file
diff --git a/renderer/graphics_private.h b/renderer/graphics_private.h
index c272ff8..2f79be6 100644
--- a/renderer/graphics_private.h
+++ b/renderer/graphics_private.h
@@ -5,4 +5,12 @@
 #ifndef V_GRAPHICS_PRIVATE_H
 #define V_GRAPHICS_PRIVATE_H
 
-#endif //V_GRAPHICS_PRIVATE_H
\ No newline at end of file
+#ifdef __EMSCRIPTEN__
+#include <GLFW/emscripten_glfw3.h>
+#else
+#include <GLFW/glfw3.h>
+#endif
+
+void platform_graphics_init(GLFWwindow *window);
+
+#endif //V_GRAPHICS_PRIVATE_H
diff --git a/renderer/metal/AAPLMathUtilities.cpp b/renderer/metal/AAPLMathUtilities.cpp
index 7e3ca1c..51a2cc3 100644
--- a/renderer/metal/AAPLMathUtilities.cpp
+++ b/renderer/metal/AAPLMathUtilities.cpp
@@ -1,3 +1,637 @@
-//
-// Created by Vicente Ferrari Smith on 27.02.26.
-//
+/*
+See LICENSE folder for this sampleâ€™s licensing information.
+
+Abstract:
+Implementation of vector, matrix, and quaternion math utility functions useful for 3D graphics
+ rendering with Metal
+
+ Metal uses column-major matrices and column-vector inputs.
+
+    linearIndex     cr              example with reference elements
+     0  4  8 12     00 10 20 30     sx  10  20   tx
+     1  5  9 13 --> 01 11 21 31 --> 01  sy  21   ty
+     2  6 10 14     02 12 22 32     02  12  sz   tz
+     3  7 11 15     03 13 23 33     03  13  1/d  33
+
+  The "cr" names are for <column><row>
+*/
+
+#include "AAPLMathUtilities.h"
+#include <assert.h>
+#include <stdlib.h>
+
+uint32_t seed_lo, seed_hi;
+
+static float inline F16ToF32(const __fp16 *address) {
+    return *address;
+}
+
+float AAPL_SIMD_OVERLOAD float32_from_float16(uint16_t i) {
+    return F16ToF32((__fp16 *)&i);
+}
+
+static inline void F32ToF16(float F32, __fp16 *F16Ptr) {
+    *F16Ptr = F32;
+}
+
+uint16_t AAPL_SIMD_OVERLOAD float16_from_float32(float f) {
+    uint16_t f16;
+    F32ToF16(f, (__fp16 *)&f16);
+    return f16;
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD generate_random_vector(float min, float max)
+{
+    vector_float3 rand;
+
+    float range = max - min;
+    rand.x = ((double)random() / (double) (0x7FFFFFFF)) * range + min;
+    rand.y = ((double)random() / (double) (0x7FFFFFFF)) * range + min;
+    rand.z = ((double)random() / (double) (0x7FFFFFFF)) * range + min;
+
+    return rand;
+}
+
+void AAPL_SIMD_OVERLOAD seedRand(uint32_t seed) {
+    seed_lo = seed; seed_hi = ~seed;
+}
+
+int32_t AAPL_SIMD_OVERLOAD randi(void) {
+    seed_hi = (seed_hi<<16) + (seed_hi>>16);
+    seed_hi += seed_lo; seed_lo += seed_hi;
+    return seed_hi;
+}
+
+float AAPL_SIMD_OVERLOAD randf(float x) {
+    return (x * randi() / (float)0x7FFFFFFF);
+}
+
+float AAPL_SIMD_OVERLOAD degrees_from_radians(float radians) {
+    return (radians / M_PI) * 180;
+}
+
+float AAPL_SIMD_OVERLOAD radians_from_degrees(float degrees) {
+    return (degrees / 180) * M_PI;
+}
+
+static vector_float3 AAPL_SIMD_OVERLOAD vector_make(float x, float y, float z) {
+    return (vector_float3){ x, y, z };
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD vector_lerp(vector_float3 v0, vector_float3 v1, float t) {
+    return ((1 - t) * v0) + (t * v1);
+}
+
+vector_float4 AAPL_SIMD_OVERLOAD vector_lerp(vector_float4 v0, vector_float4 v1, float t) {
+    return ((1 - t) * v0) + (t * v1);
+}
+
+//------------------------------------------------------------------------------
+// matrix_make_rows takes input data with rows of elements.
+// This way, the calling code matrix data can look like the rows
+// of a matrix made for transforming column vectors.
+
+// Indices are m<column><row>
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_make_rows(
+                                   float m00, float m10, float m20,
+                                   float m01, float m11, float m21,
+                                   float m02, float m12, float m22) {
+    return (matrix_float3x3){ {
+            { m00, m01, m02 },      // each line here provides column data
+            { m10, m11, m12 },
+            { m20, m21, m22 } } };
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_make_rows(
+                                   float m00, float m10, float m20, float m30,
+                                   float m01, float m11, float m21, float m31,
+                                   float m02, float m12, float m22, float m32,
+                                   float m03, float m13, float m23, float m33) {
+    return (matrix_float4x4){ {
+        { m00, m01, m02, m03 },     // each line here provides column data
+        { m10, m11, m12, m13 },
+        { m20, m21, m22, m23 },
+        { m30, m31, m32, m33 } } };
+}
+
+// each arg is a column vector
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_make_columns(
+                                   vector_float3 col0,
+                                   vector_float3 col1,
+                                   vector_float3 col2) {
+    return (matrix_float3x3){ col0, col1, col2 };
+}
+
+// each arg is a column vector
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_make_columns(
+                                   vector_float4 col0,
+                                   vector_float4 col1,
+                                   vector_float4 col2,
+                                   vector_float4 col3) {
+    return (matrix_float4x4){ col0, col1, col2, col3 };
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_from_quaternion(quaternion_float q) {
+    float xx = q.x * q.x;
+    float xy = q.x * q.y;
+    float xz = q.x * q.z;
+    float xw = q.x * q.w;
+    float yy = q.y * q.y;
+    float yz = q.y * q.z;
+    float yw = q.y * q.w;
+    float zz = q.z * q.z;
+    float zw = q.z * q.w;
+
+    // indices are m<column><row>
+    float m00 = 1 - 2 * (yy + zz);
+    float m10 = 2 * (xy - zw);
+    float m20 = 2 * (xz + yw);
+
+    float m01 = 2 * (xy + zw);
+    float m11 = 1 - 2 * (xx + zz);
+    float m21 = 2 * (yz - xw);
+
+    float m02 = 2 * (xz - yw);
+    float m12 = 2 * (yz + xw);
+    float m22 = 1 - 2 * (xx + yy);
+
+    return matrix_make_rows(m00, m10, m20,
+                            m01, m11, m21,
+                            m02, m12, m22);
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_rotation(float radians, vector_float3 axis) {
+    axis = vector_normalize(axis);
+    float ct = cosf(radians);
+    float st = sinf(radians);
+    float ci = 1 - ct;
+    float x = axis.x, y = axis.y, z = axis.z;
+    return matrix_make_rows(    ct + x * x * ci, x * y * ci - z * st, x * z * ci + y * st,
+                            y * x * ci + z * st,     ct + y * y * ci, y * z * ci - x * st,
+                            z * x * ci - y * st, z * y * ci + x * st,     ct + z * z * ci );
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_rotation(float radians, float x, float y, float z) {
+    return matrix3x3_rotation(radians, vector_make(x, y, z));
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_scale(float sx, float sy, float sz) {
+    return matrix_make_rows(sx,  0,  0,
+                             0, sy,  0,
+                             0,  0, sz);
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_scale(vector_float3 s) {
+    return matrix_make_rows(s.x,   0,   0,
+                              0, s.y,   0,
+                              0,   0, s.z);
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_upper_left(matrix_float4x4 m) {
+    vector_float3 x = m.columns[0].xyz;
+    vector_float3 y = m.columns[1].xyz;
+    vector_float3 z = m.columns[2].xyz;
+    return matrix_make_columns(x, y, z);
+}
+
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_inverse_transpose(matrix_float3x3 m) {
+    return matrix_invert(matrix_transpose(m));
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_from_quaternion(quaternion_float q) {
+
+    float xx = q.x * q.x;
+    float xy = q.x * q.y;
+    float xz = q.x * q.z;
+    float xw = q.x * q.w;
+    float yy = q.y * q.y;
+    float yz = q.y * q.z;
+    float yw = q.y * q.w;
+    float zz = q.z * q.z;
+    float zw = q.z * q.w;
+
+    // indices are m<column><row>
+    float m00 = 1 - 2 * (yy + zz);
+    float m10 = 2 * (xy - zw);
+    float m20 = 2 * (xz + yw);
+
+    float m01 = 2 * (xy + zw);
+    float m11 = 1 - 2 * (xx + zz);
+    float m21 = 2 * (yz - xw);
+
+    float m02 = 2 * (xz - yw);
+    float m12 = 2 * (yz + xw);
+    float m22 = 1 - 2 * (xx + yy);
+
+    matrix_float4x4 matrix = matrix_make_rows(m00, m10, m20, 0,
+                                              m01, m11, m21, 0,
+                                              m02, m12, m22, 0,
+                                                0,   0,   0, 1);
+    return matrix;
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_rotation(float radians, vector_float3 axis) {
+    axis = vector_normalize(axis);
+    float ct = cosf(radians);
+    float st = sinf(radians);
+    float ci = 1 - ct;
+    float x = axis.x, y = axis.y, z = axis.z;
+    return matrix_make_rows(
+                        ct + x * x * ci, x * y * ci - z * st, x * z * ci + y * st, 0,
+                    y * x * ci + z * st,     ct + y * y * ci, y * z * ci - x * st, 0,
+                    z * x * ci - y * st, z * y * ci + x * st,     ct + z * z * ci, 0,
+                                      0,                   0,                   0, 1);
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_rotation(float radians, float x, float y, float z) {
+    return matrix4x4_rotation(radians, vector_make(x, y, z));
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_identity(void) {
+    return matrix_make_rows(1, 0, 0, 0,
+                            0, 1, 0, 0,
+                            0, 0, 1, 0,
+                            0, 0, 0, 1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale(float sx, float sy, float sz) {
+    return matrix_make_rows(sx,  0,  0, 0,
+                             0, sy,  0, 0,
+                             0,  0, sz, 0,
+                             0,  0,  0, 1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale(vector_float3 s) {
+    return matrix_make_rows(s.x,   0,   0, 0,
+                              0, s.y,   0, 0,
+                              0,   0, s.z, 0,
+                              0,   0,   0, 1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_translation(float tx, float ty, float tz) {
+    return matrix_make_rows(1, 0, 0, tx,
+                            0, 1, 0, ty,
+                            0, 0, 1, tz,
+                            0, 0, 0,  1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_translation(vector_float3 t) {
+    return matrix_make_rows(1, 0, 0, t.x,
+                            0, 1, 0, t.y,
+                            0, 0, 1, t.z,
+                            0, 0, 0,   1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale_translation(vector_float3 s, vector_float3 t) {
+    return matrix_make_rows(s.x,   0,   0, t.x,
+                              0, s.y,   0, t.y,
+                              0,   0, s.z, t.z,
+                              0,   0,   0,   1 );
+}
+
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_left_hand(vector_float3 eye,
+                                                            vector_float3 target,
+                                                            vector_float3 up) {
+    vector_float3 z = vector_normalize(target - eye);
+    vector_float3 x = vector_normalize(vector_cross(up, z));
+    vector_float3 y = vector_cross(z, x);
+    vector_float3 t = vector_make(-vector_dot(x, eye), -vector_dot(y, eye), -vector_dot(z, eye));
+
+    return matrix_make_rows(x.x, x.y, x.z, t.x,
+                            y.x, y.y, y.z, t.y,
+                            z.x, z.y, z.z, t.z,
+                              0,   0,   0,   1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_left_hand(float eyeX, float eyeY, float eyeZ,
+                                                            float centerX, float centerY, float centerZ,
+                                                            float upX, float upY, float upZ) {
+    vector_float3 eye = vector_make(eyeX, eyeY, eyeZ);
+    vector_float3 center = vector_make(centerX, centerY, centerZ);
+    vector_float3 up = vector_make(upX, upY, upZ);
+
+    return matrix_look_at_left_hand(eye, center, up);
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_right_hand(vector_float3 eye,
+                                                             vector_float3 target,
+                                                             vector_float3 up) {
+    vector_float3 z = vector_normalize(eye - target);
+    vector_float3 x = vector_normalize(vector_cross(up, z));
+    vector_float3 y = vector_cross(z, x);
+    vector_float3 t = vector_make(-vector_dot(x, eye), -vector_dot(y, eye), -vector_dot(z, eye));
+
+    return matrix_make_rows(x.x, x.y, x.z, t.x,
+                            y.x, y.y, y.z, t.y,
+                            z.x, z.y, z.z, t.z,
+                              0,   0,   0,   1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_right_hand(float eyeX, float eyeY, float eyeZ,
+                                                             float centerX, float centerY, float centerZ,
+                                                             float upX, float upY, float upZ) {
+    vector_float3 eye = vector_make(eyeX, eyeY, eyeZ);
+    vector_float3 center = vector_make(centerX, centerY, centerZ);
+    vector_float3 up = vector_make(upX, upY, upZ);
+
+    return matrix_look_at_right_hand(eye, center, up);
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_ortho_left_hand(float left, float right, float bottom, float top, float nearZ, float farZ) {
+    return matrix_make_rows(
+        2 / (right - left),                  0,                  0, (left + right) / (left - right),
+                         0, 2 / (top - bottom),                  0, (top + bottom) / (bottom - top),
+                         0,                  0, 1 / (farZ - nearZ),          nearZ / (nearZ - farZ),
+                         0,                  0,                  0,                               1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_ortho_right_hand(float left, float right, float bottom, float top, float nearZ, float farZ) {
+    return matrix_make_rows(
+        2 / (right - left),                  0,                   0, (left + right) / (left - right),
+        0,                  2 / (top - bottom),                   0, (top + bottom) / (bottom - top),
+        0,                                   0, -1 / (farZ - nearZ),          nearZ / (nearZ - farZ),
+        0,                                   0,                   0,                               1 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_perspective_left_hand(float fovyRadians, float aspect, float nearZ, float farZ) {
+    float ys = 1 / tanf(fovyRadians * 0.5);
+    float xs = ys / aspect;
+    float zs = farZ / (farZ - nearZ);
+    return matrix_make_rows(xs,  0,  0,           0,
+                             0, ys,  0,           0,
+                             0,  0, zs, -nearZ * zs,
+                             0,  0,  1,           0 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_perspective_right_hand(float fovyRadians, float aspect, float nearZ, float farZ) {
+    float ys = 1 / tanf(fovyRadians * 0.5);
+    float xs = ys / aspect;
+    float zs = farZ / (nearZ - farZ);
+    return matrix_make_rows(xs,  0,  0,          0,
+                             0, ys,  0,          0,
+                             0,  0, zs, nearZ * zs,
+                             0,  0, -1,          0 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_perspective_frustum_right_hand(float l, float r, float b, float t, float n, float f) {
+    return matrix_make_rows(
+        2 * n / (r - l),                0, (r + l) / (r - l),                 0,
+                     0,   2 * n / (t - b), (t + b) / (t - b),                 0,
+                     0,                 0,      -f / (f - n), -f * n  / (f - n),
+                     0,                 0,                -1,                 0 );
+}
+
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_inverse_transpose(matrix_float4x4 m) {
+    return matrix_invert(matrix_transpose(m));
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(float x, float y, float z, float w) {
+    return (quaternion_float){ x, y, z, w };
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(vector_float3 v, float w) {
+    return (quaternion_float){ v.x, v.y, v.z, w };
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_identity() {
+    return quaternion(0, 0, 0, 1);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_axis_angle(vector_float3 axis, float radians) {
+    float t = radians * 0.5;
+    return quaternion(axis.x * sinf(t), axis.y * sinf(t), axis.z * sinf(t), cosf(t));
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_euler(vector_float3 euler) {
+    quaternion_float q;
+
+    float cx = cosf(euler.x / 2.f);
+    float cy = cosf(euler.y / 2.f);
+    float cz = cosf(euler.z / 2.f);
+    float sx = sinf(euler.x / 2.f);
+    float sy = sinf(euler.y / 2.f);
+    float sz = sinf(euler.z / 2.f);
+
+    q.w = cx * cy * cz + sx * sy * sz;
+    q.x = sx * cy * cz - cx * sy * sz;
+    q.y = cx * sy * cz + sx * cy * sz;
+    q.z = cx * cy * sz - sx * sy * cz;
+
+    return q;
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(matrix_float3x3 m) {
+    float m00 = m.columns[0].x;
+    float m11 = m.columns[1].y;
+    float m22 = m.columns[2].z;
+    float x = sqrtf(1 + m00 - m11 - m22) * 0.5;
+    float y = sqrtf(1 - m00 + m11 - m22) * 0.5;
+    float z = sqrtf(1 - m00 - m11 + m22) * 0.5;
+    float w = sqrtf(1 + m00 + m11 + m22) * 0.5;
+    return quaternion(x, y, z, w);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(matrix_float4x4 m) {
+    return quaternion(matrix3x3_upper_left(m));
+}
+
+float AAPL_SIMD_OVERLOAD quaternion_length(quaternion_float q) {
+    //  return sqrt(q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w);
+    return vector_length(q);
+}
+
+float AAPL_SIMD_OVERLOAD quaternion_length_squared(quaternion_float q) {
+    //  return q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+    return vector_length_squared(q);
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD quaternion_axis(quaternion_float q) {
+    // This query doesn't make sense if w > 1, but we do our best by
+    // forcing q to be a unit quaternion if it obviously isn't
+    if (q.w > 1.0)
+    {
+        q = quaternion_normalize(q);
+    }
+
+    float axisLen = sqrtf(1 - q.w * q.w);
+
+    if (axisLen < 1e-5)
+    {
+        // At lengths this small, direction is arbitrary
+        return vector_make(1, 0, 0);
+    }
+    else
+    {
+        return vector_make(q.x / axisLen, q.y / axisLen, q.z / axisLen);
+    }
+}
+
+float AAPL_SIMD_OVERLOAD quaternion_angle(quaternion_float q) {
+    return 2 * acosf(q.w);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_normalize(quaternion_float q) {
+    //  return q / quaternion_length(q);
+    return vector_normalize(q);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_inverse(quaternion_float q) {
+    return quaternion_conjugate(q) / quaternion_length_squared(q);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_conjugate(quaternion_float q) {
+    return quaternion(-q.x, -q.y, -q.z, q.w);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_multiply(quaternion_float q0, quaternion_float q1) {
+    quaternion_float q;
+
+    q.x = q0.w*q1.x + q0.x*q1.w + q0.y*q1.z - q0.z*q1.y;
+    q.y = q0.w*q1.y - q0.x*q1.z + q0.y*q1.w + q0.z*q1.x;
+    q.z = q0.w*q1.z + q0.x*q1.y - q0.y*q1.x + q0.z*q1.w;
+    q.w = q0.w*q1.w - q0.x*q1.x - q0.y*q1.y - q0.z*q1.z;
+    return q;
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_slerp(quaternion_float q0, quaternion_float q1, float t) {
+    quaternion_float q;
+
+    float cosHalfTheta = vector_dot(q0, q1);
+    if (fabs(cosHalfTheta) >= 1.f) ///q0=q1 or q0=q1
+    {
+        return q0;
+    }
+
+    float halfTheta = acosf(cosHalfTheta);
+    float sinHalfTheta = sqrtf(1.f - cosHalfTheta * cosHalfTheta);
+    if (fabs(sinHalfTheta) < 0.001f)
+    {    // q0 & q1 180 degrees not defined
+        return q0*0.5f + q1*0.5f;
+    }
+    float srcWeight = sin((1 - t) * halfTheta) / sinHalfTheta;
+    float dstWeight = sin(t * halfTheta) / sinHalfTheta;
+
+    q = srcWeight*q0 + dstWeight*q1;
+
+    return q;
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD quaternion_rotate_vector(quaternion_float q, vector_float3 v) {
+    vector_float3 qp = vector_make(q.x, q.y, q.z);
+    float w = q.w;
+    return 2 * vector_dot(qp, v) * qp +
+           ((w * w) - vector_dot(qp, qp)) * v +
+           2 * w * vector_cross(qp, v);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_matrix3x3(matrix_float3x3 m)
+{
+    quaternion_float q;
+
+    float trace = 1 + m.columns[0][0] + m.columns[1][1] + m.columns[2][2];
+
+    if(trace > 0)
+    {
+        float diagonal = sqrt(trace) * 2.0;
+
+        q.x = (m.columns[2][1] - m.columns[1][2]) / diagonal;
+        q.y = (m.columns[0][2] - m.columns[2][0]) / diagonal;
+        q.z = (m.columns[1][0] - m.columns[0][1]) / diagonal;
+        q.w = diagonal / 4.0;
+
+    }  else if ((m.columns[0][0] > m.columns[1][1] ) &&
+                (m.columns[0][0] > m.columns[2][2])) {
+
+        float diagonal = sqrt( 1.0 + m.columns[0][0] - m.columns[1][1] - m.columns[2][2] ) * 2.0;
+
+        q.x = diagonal / 4.0;
+        q.y = (m.columns[0][1] + m.columns[1][0]) / diagonal;
+        q.z = (m.columns[0][2] + m.columns[2][0]) / diagonal;
+        q.w = (m.columns[2][1] - m.columns[1][2]) / diagonal;
+
+    } else if ( m.columns[1][1] > m.columns[2][2]) {
+
+        float diagonal = sqrt(1.0 + m.columns[1][1] - m.columns[0][0] - m.columns[2][2]) * 2.0;
+
+        q.x = (m.columns[0][1] + m.columns[1][0]) / diagonal;
+        q.y = diagonal / 4.0;
+        q.z = (m.columns[1][2] + m.columns[2][1]) / diagonal;
+        q.w = (m.columns[0][2] - m.columns[2][0]) / diagonal;
+
+    } else {
+
+        float diagonal = sqrt(1.0 + m.columns[2][2] - m.columns[0][0] - m.columns[1][1]) * 2.0;
+
+        q.x = (m.columns[0][2] + m.columns[2][0]) / diagonal;
+        q.y = (m.columns[1][2] + m.columns[2][1]) / diagonal;
+        q.z = diagonal / 4.0;
+        q.w = (m.columns[1][0] - m.columns[0][1]) / diagonal;
+    }
+
+    q = quaternion_normalize(q);
+    return q;
+}
+
+static inline quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_direction_vectors(vector_float3 forward, vector_float3 up, int right_handed) {
+
+    forward = vector_normalize(forward);
+    up = vector_normalize(up);
+
+    vector_float3 side = vector_normalize(vector_cross(up, forward));
+
+    matrix_float3x3 m = { side, up, forward };
+
+    quaternion_float q = quaternion_from_matrix3x3(m);
+
+    if(right_handed) {
+        q = q.yxwz;
+        q.xw = -q.xw;
+    }
+
+    q = vector_normalize(q);
+
+    return q;
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_direction_vectors_right_hand(vector_float3 forward, vector_float3 up) {
+
+    return quaternion_from_direction_vectors(forward, up, 1);
+}
+
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_direction_vectors_left_hand(vector_float3 forward, vector_float3 up) {
+
+    return quaternion_from_direction_vectors(forward, up, 0);
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD forward_direction_vector_from_quaternion(quaternion_float q) {
+    vector_float3 direction;
+    direction.x = 2.0 * (q.x*q.z - q.w*q.y);
+    direction.y = 2.0 * (q.y*q.z + q.w*q.x);
+    direction.z = 1.0 - 2.0 * ((q.x * q.x) + (q.y * q.y));
+
+    direction = vector_normalize(direction);
+    return direction;
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD up_direction_vector_from_quaternion(quaternion_float q) {
+    vector_float3 direction;
+    direction.x = 2.0 * (q.x*q.y + q.w*q.z);
+    direction.y = 1.0 - 2.0 * (q.x*q.x + q.z*q.z);
+    direction.z = 2.0 * (q.y*q.z - q.w*q.x);
+
+    direction = vector_normalize(direction);
+    // Negate for a right-handed coordinate system
+    return direction;
+}
+
+vector_float3 AAPL_SIMD_OVERLOAD right_direction_vector_from_quaternion(quaternion_float q) {
+    vector_float3 direction;
+    direction.x = 1.0 - 2.0 * (q.y * q.y + q.z * q.z);
+    direction.y = 2.0 * (q.x * q.y - q.w * q.z);
+    direction.z = 2.0 * (q.x * q.z + q.w * q.y);
+
+    direction = vector_normalize(direction);
+    // Negate for a right-handed coordinate system
+    return direction;
+}
diff --git a/renderer/metal/AAPLMathUtilities.h b/renderer/metal/AAPLMathUtilities.h
index a098e82..a94dbef 100644
--- a/renderer/metal/AAPLMathUtilities.h
+++ b/renderer/metal/AAPLMathUtilities.h
@@ -1,8 +1,266 @@
-//
-// Created by Vicente Ferrari Smith on 27.02.26.
-//
+/*
+See LICENSE folder for this sampleâ€™s licensing information.
 
-#ifndef V_AAPLMATHUTILITIES_H
-#define V_AAPLMATHUTILITIES_H
+Abstract:
+Header for vector, matrix, and quaternion math utility functions useful for 3D graphics rendering.
+*/
 
-#endif //V_AAPLMATHUTILITIES_H
+#include <stdlib.h>
+#include <simd/simd.h>
+
+// Because these are common methods, allow other libraries to overload their implementation.
+#define AAPL_SIMD_OVERLOAD __attribute__((__overloadable__))
+
+/// A single-precision quaternion type.
+typedef vector_float4 quaternion_float;
+
+/// Given a uint16_t encoded as a 16-bit float, returns a 32-bit float.
+float AAPL_SIMD_OVERLOAD float32_from_float16(uint16_t i);
+
+// Given a 32-bit float, returns a uint16_t encoded as a 16-bit float.
+uint16_t AAPL_SIMD_OVERLOAD float16_from_float32(float f);
+
+/// Returns the number of degrees in the specified number of radians.
+float AAPL_SIMD_OVERLOAD degrees_from_radians(float radians);
+
+/// Returns the number of radians in the specified number of degrees.
+float AAPL_SIMD_OVERLOAD radians_from_degrees(float degrees);
+
+// Generates a random float value inside the given range.
+inline static float AAPL_SIMD_OVERLOAD  random_float(float min, float max)
+{
+    return (((double)random()/RAND_MAX) * (max-min)) + min;
+}
+
+/// Generate a random three-component vector with values between min and max.
+vector_float3 AAPL_SIMD_OVERLOAD generate_random_vector(float min, float max);
+
+/// Fast random seed.
+void AAPL_SIMD_OVERLOAD seedRand(uint32_t seed);
+
+/// Fast integer random.
+int32_t AAPL_SIMD_OVERLOAD randi(void);
+
+/// Fast floating-point random.
+float AAPL_SIMD_OVERLOAD randf(float x);
+
+/// Returns a vector that is linearly interpolated between the two given vectors.
+vector_float3 AAPL_SIMD_OVERLOAD vector_lerp(vector_float3 v0, vector_float3 v1, float t);
+
+/// Returns a vector that is linearly interpolated between the two given vectors.
+vector_float4 AAPL_SIMD_OVERLOAD vector_lerp(vector_float4 v0, vector_float4 v1, float t);
+
+/// Converts a unit-norm quaternion into its corresponding rotation matrix.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_from_quaternion(quaternion_float q);
+
+/// Constructs a matrix_float3x3 from three rows of three columns with float values.
+/// Indices are m<column><row>.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_make_rows(float m00, float m10, float m20,
+                                                    float m01, float m11, float m21,
+                                                    float m02, float m12, float m22);
+
+/// Constructs a matrix_float4x4 from four rows of four columns with float values.
+/// Indices are m<column><row>.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_make_rows(float m00, float m10, float m20, float m30,
+                                                    float m01, float m11, float m21, float m31,
+                                                    float m02, float m12, float m22, float m32,
+                                                    float m03, float m13, float m23, float m33);
+
+/// Constructs a matrix_float3x3 from 3 vector_float3 column vectors.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_make_columns(vector_float3 col0,
+                                                       vector_float3 col1,
+                                                       vector_float3 col2);
+
+/// Constructs a matrix_float4x4 from 4 vector_float4 column vectors.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_make_columns(vector_float4 col0,
+                                                       vector_float4 col1,
+                                                       vector_float4 col2,
+                                                       vector_float4 col3);
+
+/// Constructs a rotation matrix from the given angle and axis.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_rotation(float radians, vector_float3 axis);
+
+/// Constructs a rotation matrix from the given angle and axis.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_rotation(float radians, float x, float y, float z);
+
+/// Constructs a scaling matrix with the specified scaling factors.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_scale(float x, float y, float z);
+
+/// Constructs a scaling matrix, using the given vector as an array of scaling factors.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_scale(vector_float3 s);
+
+/// Extracts the upper-left 3x3 submatrix of the given 4x4 matrix.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix3x3_upper_left(matrix_float4x4 m);
+
+/// Returns the inverse of the transpose of the given matrix.
+matrix_float3x3 AAPL_SIMD_OVERLOAD matrix_inverse_transpose(matrix_float3x3 m);
+
+/// Constructs a homogeneous rotation matrix from the given angle and axis.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_from_quaternion(quaternion_float q);
+
+/// Constructs a rotation matrix from the provided angle and axis
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_rotation(float radians, vector_float3 axis);
+
+/// Constructs a rotation matrix from the given angle and axis.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_rotation(float radians, float x, float y, float z);
+
+/// Constructs an identity matrix.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_identity(void);
+
+/// Constructs a scaling matrix with the given scaling factors.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale(float sx, float sy, float sz);
+
+/// Constructs a scaling matrix, using the given vector as an array of scaling factors.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale(vector_float3 s);
+
+/// Constructs a translation matrix that translates by the vector (tx, ty, tz).
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_translation(float tx, float ty, float tz);
+
+/// Constructs a translation matrix that translates by the vector (t.x, t.y, t.z).
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_translation(vector_float3 t);
+
+/// Constructs a translation matrix that scales by the vector (s.x, s.y, s.z)
+/// and translates by the vector (t.x, t.y, t.z).
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix4x4_scale_translation(vector_float3 s, vector_float3 t);
+
+/// Starting with left-hand world coordinates, constructs a view matrix that is
+/// positioned at (eyeX, eyeY, eyeZ) and looks toward (centerX, centerY, centerZ),
+/// with the vector (upX, upY, upZ) pointing up for a left-hand coordinate system.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_left_hand(float eyeX, float eyeY, float eyeZ,
+                                                            float centerX, float centerY, float centerZ,
+                                                            float upX, float upY, float upZ);
+
+/// Starting with left-hand world coordinates, constructs a view matrix that is
+/// positioned at (eye) and looks toward (target), with the vector (up) pointing
+/// up for a left-hand coordinate system.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_left_hand(vector_float3 eye,
+                                                            vector_float3 target,
+                                                            vector_float3 up);
+
+/// Starting with right-hand world coordinates, constructs a view matrix that is
+/// positioned at (eyeX, eyeY, eyeZ) and looks toward (centerX, centerY, centerZ),
+/// with the vector (upX, upY, upZ) pointing up for a right-hand coordinate system.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_right_hand(float eyeX, float eyeY, float eyeZ,
+                                                             float centerX, float centerY, float centerZ,
+                                                             float upX, float upY, float upZ);
+
+/// Starting with right-hand world coordinates, constructs a view matrix that is
+/// positioned at (eye) and looks toward (target), with the vector (up) pointing
+/// up for a right-hand coordinate system.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_look_at_right_hand(vector_float3 eye,
+                                                             vector_float3 target,
+                                                             vector_float3 up);
+
+/// Constructs a symmetric orthographic projection matrix, from left-hand eye
+/// coordinates to left-hand clip coordinates.
+/// That maps (left, top) to (-1, 1), (right, bottom) to (1, -1), and (nearZ, farZ) to (0, 1).
+/// The first four arguments are signed eye coordinates.
+/// nearZ and farZ are absolute distances from the eye to the near and far clip planes.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_ortho_left_hand(float left, float right, float bottom, float top, float nearZ, float farZ);
+
+/// Constructs a symmetric orthographic projection matrix, from right-hand eye
+/// coordinates to right-hand clip coordinates.
+/// That maps (left, top) to (-1, 1), (right, bottom) to (1, -1), and (nearZ, farZ) to (0, 1).
+/// The first four arguments are signed eye coordinates.
+/// nearZ and farZ are absolute distances from the eye to the near and far clip planes.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_ortho_right_hand(float left, float right, float bottom, float top, float nearZ, float farZ);
+
+/// Constructs a symmetric perspective projection matrix, from left-hand eye
+/// coordinates to left-hand clip coordinates, with a vertical viewing angle of
+/// fovyRadians, the given aspect ratio, and the given absolute near and far
+/// z distances from the eye.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_perspective_left_hand(float fovyRadians, float aspect, float nearZ, float farZ);
+
+/// Constructs a symmetric perspective projection matrix, from right-hand eye
+/// coordinates to right-hand clip coordinates, with a vertical viewing angle of
+/// fovyRadians, the given aspect ratio, and the given absolute near and far
+/// z distances from the eye.
+matrix_float4x4  AAPL_SIMD_OVERLOAD matrix_perspective_right_hand(float fovyRadians, float aspect, float nearZ, float farZ);
+
+/// Construct a general frustum projection matrix, from right-hand eye
+/// coordinates to left-hand clip coordinates.
+/// The bounds left, right, bottom, and top, define the visible frustum at the near clip plane.
+/// The first four arguments are signed eye coordinates.
+/// nearZ and farZ are absolute distances from the eye to the near and far clip planes.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_perspective_frustum_right_hand(float left, float right, float bottom, float top, float nearZ, float farZ);
+
+/// Returns the inverse of the transpose of the given matrix.
+matrix_float4x4 AAPL_SIMD_OVERLOAD matrix_inverse_transpose(matrix_float4x4 m);
+
+/// Constructs an identity quaternion.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_identity(void);
+
+/// Constructs a quaternion of the form w + xi + yj + zk.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(float x, float y, float z, float w);
+
+/// Constructs a quaternion of the form w + v.x*i + v.y*j + v.z*k.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(vector_float3 v, float w);
+
+/// Constructs a unit-norm quaternion that represents rotation by the given angle about the axis (x, y, z).
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(float radians, float x, float y, float z);
+
+/// Constructs a unit-norm quaternion that represents rotation by the given angle about the specified axis.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(float radians, vector_float3 axis);
+
+/// Constructs a unit-norm quaternion from the given matrix.
+/// The result is undefined if the matrix does not represent a pure rotation.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(matrix_float3x3 m);
+
+/// Constructs a unit-norm quaternion from the given matrix.
+/// The result is undefined if the matrix does not represent a pure rotation.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion(matrix_float4x4 m);
+
+/// Returns the length of the given quaternion.
+float AAPL_SIMD_OVERLOAD quaternion_length(quaternion_float q);
+
+float AAPL_SIMD_OVERLOAD quaternion_length_squared(quaternion_float q);
+
+/// Returns the rotation axis of the given unit-norm quaternion.
+vector_float3 AAPL_SIMD_OVERLOAD quaternion_axis(quaternion_float q);
+
+/// Returns the rotation angle of the given unit-norm quaternion.
+float AAPL_SIMD_OVERLOAD quaternion_angle(quaternion_float q);
+
+/// Returns a quaternion from the given rotation axis and angle, in radians.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_axis_angle(vector_float3 axis, float radians);
+
+/// Returns a quaternion from the given 3x3 rotation matrix.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_matrix3x3(matrix_float3x3 m);
+
+/// Returns a quaternion from the given Euler angle, in radians.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_euler(vector_float3 euler);
+
+/// Returns a unit-norm quaternion.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_normalize(quaternion_float q);
+
+/// Returns the inverse quaternion of the given quaternion.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_inverse(quaternion_float q);
+
+/// Returns the conjugate quaternion of the given quaternion.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_conjugate(quaternion_float q);
+
+/// Returns the product of the two given quaternions.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_multiply(quaternion_float q0, quaternion_float q1);
+
+/// Returns the quaternion that results from spherically interpolating between the two given quaternions.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_slerp(quaternion_float q0, quaternion_float q1, float t);
+
+/// Returns the vector that results from rotating the given vector by the given unit-norm quaternion.
+vector_float3 AAPL_SIMD_OVERLOAD quaternion_rotate_vector(quaternion_float q, vector_float3 v);
+
+/// Returns the quaternion for the given forward and up vectors for right-hand coordinate systems.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_direction_vectors_right_hand(vector_float3 forward, vector_float3 up);
+
+/// Returns the quaternion for the given forward and up vectors for left-hand coordinate systems.
+quaternion_float AAPL_SIMD_OVERLOAD quaternion_from_direction_vectors_left_hand(vector_float3 forward, vector_float3 up);
+
+/// Returns a vector in the +Z direction for the given quaternion.
+vector_float3 AAPL_SIMD_OVERLOAD forward_direction_vector_from_quaternion(quaternion_float q);
+
+/// Returns a vector in the +Y direction for the given quaternion (for a left-handed coordinate system,
+///   negate for a right-hand coordinate system).
+vector_float3 AAPL_SIMD_OVERLOAD up_direction_vector_from_quaternion(quaternion_float q);
+
+/// Returns a vector in the +X direction for the given quaternion (for a left-hand coordinate system,
+///   negate for a right-hand coordinate system).
+vector_float3 AAPL_SIMD_OVERLOAD right_direction_vector_from_quaternion(quaternion_float q);
diff --git a/renderer/metal/metal.cpp b/renderer/metal/metal.cpp
index bc254c6..34decbc 100644
--- a/renderer/metal/metal.cpp
+++ b/renderer/metal/metal.cpp
@@ -2,8 +2,18 @@
 // Created by Vicente Ferrari Smith on 26.02.26.
 //
 
-#include "init.h"
+#define NS_PRIVATE_IMPLEMENTATION
+#define MTL_PRIVATE_IMPLEMENTATION
+#define MTK_PRIVATE_IMPLEMENTATION
+#define CA_PRIVATE_IMPLEMENTATION
+
+#include <Foundation/Foundation.hpp>
+#include <Metal/Metal.hpp>
+#include <QuartzCore/QuartzCore.hpp>
+
+#include "metal.h"
 #include "../graphics.h"
+#include "../texture.h"
 #include <print>
 #include <iostream>
 #include <fstream>
@@ -12,20 +22,41 @@
 #include <GLFW/glfw3native.h>
 #include <objc/message.h>
 #include <objc/objc.h>
-#include "vertex_data.h"
 
-Device metal_device{};
-MTL::Buffer* triangle_vertex_buffer{};
-MTL::CommandQueue *queue{};
-CA::MetalLayer *metal_layer{};
-MTL::RenderPipelineState *pipeline_state{};
-CA::MetalDrawable *metal_drawable{};
-MTL::CommandBuffer* metal_command_buffer{};
+extern int32_t window_width;
+extern int32_t window_height;
 
-MTL::Function *vertex_shader{};
-MTL::Function *fragment_shader{};
+Device                    metal_device{};
+MTL::CommandQueue        *queue{};
+CA::MetalLayer           *metal_layer{};
+CA::MetalDrawable        *metal_drawable{};
 
-void create_window(GLFWwindow *window) {
+Renderer renderer;
+
+void upload_texture(
+    const int w,
+    const int h,
+    const void *pixels,
+    Texture *texture)
+{
+    MTL::TextureDescriptor *td = MTL::TextureDescriptor::alloc()->init();
+    td->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
+    td->setWidth(w);
+    td->setHeight(h);
+    td->setStorageMode(MTL::StorageModeShared);
+    td->setUsage(MTL::TextureUsageShaderRead);
+
+    texture->p_texture->texture = metal_device.device->newTexture(td);
+
+    MTL::Region region = MTL::Region(0, 0, 0, w, h, 1);
+    NS::UInteger bytesPerRow = 4 * w;
+
+    texture->p_texture->texture->replaceRegion(region, 0, pixels, bytesPerRow);
+
+    td->release();
+}
+
+void create_metal_layer(GLFWwindow *window) {
     void *ns_window = glfwGetCocoaWindow(window);
     if (!ns_window) {
         throw std::runtime_error("Failed to get Cocoa window from GLFWwindow");
@@ -51,44 +82,11 @@ void create_window(GLFWwindow *window) {
     metal_layer->setColorspace(p3Space);
 }
 
-void encode_render_command(MTL::RenderCommandEncoder *renderCommandEncoder) {
-    renderCommandEncoder->setRenderPipelineState(pipeline_state);
-    renderCommandEncoder->setVertexBuffer(triangle_vertex_buffer, 0, 0);
-    MTL::PrimitiveType typeTriangle = MTL::PrimitiveTypeTriangle;
-    NS::UInteger vertexStart = 0;
-    NS::UInteger vertexCount = 6;
-    renderCommandEncoder->drawPrimitives(typeTriangle, vertexStart, vertexCount);
-}
-
-void send_render_command() {
-    metal_command_buffer = queue->commandBuffer();
-
-    MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
-    MTL::RenderPassColorAttachmentDescriptor *cd = renderPassDescriptor->colorAttachments()->object(0);
-    cd->setTexture(metal_drawable->texture());
-    cd->setLoadAction(MTL::LoadActionClear);
-    cd->setClearColor(MTL::ClearColor(
-        100.0f / 255.0f,
-        149.0f / 255.0f,
-        237.0f / 255.0f,
-        1.0
-        ));
-    cd->setStoreAction(MTL::StoreActionStore);
-
-    MTL::RenderCommandEncoder* renderCommandEncoder = metal_command_buffer->renderCommandEncoder(renderPassDescriptor);
-    encode_render_command(renderCommandEncoder);
-    renderCommandEncoder->endEncoding();
-
-    metal_command_buffer->presentDrawable(metal_drawable);
-    metal_command_buffer->commit();
-    metal_command_buffer->waitUntilCompleted();
-
-    renderPassDescriptor->release();
-}
-
-void LoadMetalShader(const std::string &shader_path,
-                     const std::string &vertex_fn_name,
-                     const std::string &fragment_fn_name)
+void load_metal_shader(const std::string &shader_path,
+                       const std::string &vertex_fn_name,
+                       const std::string &fragment_fn_name,
+                       MTL::Function **vertex_shader,
+                       MTL::Function **fragment_shader)
 {
     NS::Error *error = nullptr;
     MTL::Library *library = nullptr;
@@ -137,80 +135,72 @@ void LoadMetalShader(const std::string &shader_path,
     }
     NS::String *vname = NS::String::string(vertex_fn_name.c_str(), NS::UTF8StringEncoding);
     NS::String *fname = NS::String::string(fragment_fn_name.c_str(), NS::UTF8StringEncoding);
-    vertex_shader = library->newFunction(vname);
-    fragment_shader = library->newFunction(fname);
+    *vertex_shader = library->newFunction(vname);
+    *fragment_shader = library->newFunction(fname);
 
-    if (vertex_shader == nullptr || fragment_shader == nullptr) {
+    if (*vertex_shader == nullptr || *fragment_shader == nullptr) {
         throw std::runtime_error("Failed to create Metal shader functions");
     }
 
     library->release();
 }
 
-void create_render_pipeline() {
-    LoadMetalShader("shaders/shader.metal", "vertex_main", "fragment_main");
-
-    MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
-    renderPipelineDescriptor->setLabel(NS::String::string("Triangle Rendering Pipeline", NS::ASCIIStringEncoding));
-    renderPipelineDescriptor->setVertexFunction(vertex_shader);
-    renderPipelineDescriptor->setFragmentFunction(fragment_shader);
-    assert(renderPipelineDescriptor);
-    const MTL::PixelFormat pixel_format = metal_layer->pixelFormat();
-    renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(pixel_format);
-
-    NS::Error* error;
-    pipeline_state = metal_device.device->newRenderPipelineState(renderPipelineDescriptor, &error);
-    renderPipelineDescriptor->release();
-}
-
 void create_command_queue() {
     queue = metal_device.device->newCommandQueue();
 }
 
-void create_triangle() {
-    VertexData square_vertices[] = {
-        {{-0.5, -0.5}, {1.0, 0.0, 0.0, 1.0}},
-        {{0.5, -0.5}, {0.0, 1.0, 0.0, 1.0}},
-        {{0.5, 0.5}, {0.0, 0.0, 1.0, 1.0}},
-
-        {{0.5, 0.5}, {0.0, 0.0, 1.0, 1.0}},
-        {{-0.5, 0.5}, {0.0, 1.0, 0.0, 1.0}},
-        {{-0.5, -0.5}, {1.0, 0.0, 0.0, 1.0}},
-    };
-
-    triangle_vertex_buffer = metal_device.device->newBuffer(&square_vertices,
-                                                  sizeof(square_vertices),
-                                                  MTL::ResourceStorageModeShared);
-
-}
-
-void graphics_init(GLFWwindow *window) {
-    std::println("wow, we are on macos!! crazy!!");
-
-    create_device();
-    create_window(window);
-    create_triangle();
-    create_command_queue();
-    create_render_pipeline();
-}
-
-void graphics_deinit() {
-
-}
-
-void begin_frame() {
-
-}
-
-void end_frame() {
-    auto pPool = NS::AutoreleasePool::alloc()->init();
-    metal_drawable = metal_layer->nextDrawable();
-
-    send_render_command();
-
-    pPool->release();
-}
-
 void create_device() {
     metal_device.device = MTL::CreateSystemDefaultDevice();
 }
+
+void platform_graphics_init(GLFWwindow *window) {
+    std::println("wow, we are on macos!! crazy!!");
+
+    auto slangTargets{ std::to_array<slang::TargetDesc>({ {
+        .format = SLANG_METAL,
+        .profile = slangGlobalSession->findProfile("metallib_2_4"),
+    } })};
+    // auto slangOptions{ std::to_array<slang::CompilerOptionEntry>({ {
+    //     slang::CompilerOptionName::EmitSpirvDirectly,
+    //     {slang::CompilerOptionValueKind::Int, 1}
+    // } })};
+    const char *search_paths[] = {"shaders"};
+    slang::SessionDesc slangSessionDesc{
+        .targets = slangTargets.data(),
+        .targetCount = SlangInt(slangTargets.size()),
+        .searchPaths = search_paths,
+        .searchPathCount = 1,
+        // .defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
+    };
+
+    slangGlobalSession->createSession(slangSessionDesc, slangSession.writeRef());
+
+    create_device();
+    create_metal_layer(window);
+    create_command_queue();
+
+    renderer = Renderer(window);
+}
+
+void graphics_deinit() {
+    queue->release();
+    metal_layer->release();
+    metal_device.device->release();
+}
+
+void begin_frame() {
+    renderer.begin_frame();
+}
+
+void end_frame(GLFWwindow *window) {
+    auto pPool = NS::AutoreleasePool::alloc()->init();
+    metal_drawable = metal_layer->nextDrawable();
+
+    renderer.end_frame(window);
+
+    pPool->release();
+}
+
+void submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
+    renderer.submit_sprite({pos.x, pos.y}, sprite);
+}
diff --git a/renderer/metal/metal.h b/renderer/metal/metal.h
index 9b44768..f1d1634 100644
--- a/renderer/metal/metal.h
+++ b/renderer/metal/metal.h
@@ -2,8 +2,8 @@
 // Created by Vicente Ferrari Smith on 26.02.26.
 //
 
-#ifndef M_INIT_H
-#define M_INIT_H
+#ifndef V_METAL_H
+#define V_METAL_H
 
 #include <GLFW/glfw3.h>
 #define GLFW_EXPOSE_NATIVE_COCOA
@@ -17,6 +17,18 @@ struct Device {
     MTL::Device *device;
 };
 
-void create_device();
+struct PlatformTexture {
+    MTL::Texture *texture;
+};
 
-#endif //M_INIT_H
\ No newline at end of file
+struct Queue {
+    MTL::CommandQueue *queue;
+};
+
+void load_metal_shader(const std::string &shader_path,
+                       const std::string &vertex_fn_name,
+                       const std::string &fragment_fn_name,
+                       MTL::Function **vertex_shader,
+                       MTL::Function **fragment_shader);
+
+#endif //V_METAL_H
\ No newline at end of file
diff --git a/renderer/metal/renderer.cpp b/renderer/metal/renderer.cpp
index 17ad577..5424707 100644
--- a/renderer/metal/renderer.cpp
+++ b/renderer/metal/renderer.cpp
@@ -2,18 +2,24 @@
 // Created by Vicente Ferrari Smith on 13.02.26.
 //
 
-#include "../graphics.h"
-
 #include <print>
-
-#include "init.h"
+#include <slang.h>
+#include <slang-com-ptr.h>
+#include "../graphics.h"
 #include "../sprite.h"
-#include <vma/vk_mem_alloc.h>
-#include <slang/slang.h>
+#include "renderer.h"
+
+#include "metal.h"
+#include "vertex_data.h"
 
 extern int32_t window_width;
 extern int32_t window_height;
 
+extern Device                    metal_device;
+extern MTL::CommandQueue        *queue;
+extern CA::MetalLayer           *metal_layer;
+extern CA::MetalDrawable        *metal_drawable;
+
 bool SortKey::operator<(const SortKey& b) const {
     if (depth != b.depth) return depth < b.depth;
     if (pipeline != b.pipeline) return pipeline < b.pipeline;
@@ -22,28 +28,40 @@ bool SortKey::operator<(const SortKey& b) const {
 
 Renderer::Renderer(GLFWwindow *window) {
 
-    create_pipeline_layout();
-    colored_quad_pipeline = create_graphics_pipeline<vertex_p2_s2_st2_col4_a1_u32>(
-        device,
-        pipelineLayout,
-        swapchain_format.format,
-        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
-        true
-        );
-    create_default_sampler();
-    create_descriptor_pool();
-    createFrameResources();
+    create_render_pipeline();
+
+    frame_semaphore = dispatch_semaphore_create(kMaxFramesInFlight);
+    current_frame = 0;
+
+    for (Frame &frame : frames) {
+        frame.vertex_buffer = metal_device.device->newBuffer(
+            4 * 1024 * 1024,
+            MTL::ResourceStorageModeShared
+            );
+
+        frame.uniform_buffer = metal_device.device->newBuffer(
+            sizeof(simd::float4x4),
+            MTL::ResourceStorageModeShared
+            );
+    }
+
+    // colored_quad_pipeline = create_graphics_pipeline<vertex_p2_s2_st2_col4_a1_u32>(
+    //     device,
+    //     pipelineLayout,
+    //     swapchain_format.format,
+    //     VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+    //     true
+    //     );
+    // create_default_sampler();
+    // create_descriptor_pool();
+    // createFrameResources();
 }
 
 void Renderer::begin_frame() {
     commands.clear();
 }
 
-void Renderer::flush() {
-
-}
-
-void Renderer::submit_quad(glm::vec2 pos, glm::vec2 scale) {
+void Renderer::submit_quad(simd::float2 pos, simd::float2 scale) {
     RenderCommand cmd {};
     cmd.pipeline = PipelineType::ColoredQuad;
     cmd.key = {
@@ -55,13 +73,13 @@ void Renderer::submit_quad(glm::vec2 pos, glm::vec2 scale) {
     cmd.colored_quad = {
         .pos = pos,
         .scale = scale,
-        .color = {0, 1, 1, 1},
+        .colour = {0, 1, 1, 1},
     };
 
     commands.push_back(cmd);
 }
 
-void Renderer::submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
+void Renderer::submit_sprite(simd::float2 pos, const sprite_t &sprite) {
     RenderCommand cmd {};
     cmd.pipeline = PipelineType::TexturedQuad;
     cmd.key = {
@@ -70,13 +88,15 @@ void Renderer::submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
         (uint8_t) PipelineType::TexturedQuad
     };
 
+    const Texture &texture = texture_manager.textures[sprite.texture];
+
     cmd.textured_quad = {
-        .position = pos,
-        .size = {0, 0},
-        .uvMin = {0, 0},
-        .uvMax = {0, 0},
-        .color = {1, 1, 1, 1},
-        .textureID = 0,
+        .pos = pos,
+        .scale = { sprite.scale.x, sprite.scale.y },
+        .uv0 = {0, 0},
+        .uv1 = {1, 1},
+        .colour = {1, 1, 1, 1},
+        .texture = texture.p_texture->texture,
     };
 
     commands.push_back(cmd);
@@ -101,190 +121,7 @@ void Renderer::submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
     // array_add(*renderer.renderable_list, renderable);
 }
 
-void Renderer::create_pipeline_layout() {
-    std::array<VkDescriptorSetLayoutBinding, 1> bindings = {
-        {
-            {
-                .binding = 0,
-                .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .descriptorCount = nextTextureSlot,
-                .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT
-            }
-        }
-    };
-
-    VkDescriptorBindingFlags flags[1] = {
-        VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT
-    };
-
-    VkDescriptorSetLayoutBindingFlagsCreateInfo layoutFlags{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO,
-        .bindingCount = 1,
-        .pBindingFlags = flags
-    };
-
-    VkDescriptorSetLayoutCreateInfo dslci{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-        .pNext = &layoutFlags,
-        // .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT,
-        .bindingCount = bindings.size(),
-        .pBindings = bindings.data()
-    };
-
-    vkCreateDescriptorSetLayout(device, &dslci, nullptr, &descriptor_set_layout);
-
-    VkPushConstantRange push_constant{
-        .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
-        .offset = 0,
-        .size = sizeof(glm::mat4),
-    };
-
-    VkPipelineLayoutCreateInfo plci{
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .setLayoutCount = 1,
-        .pSetLayouts = &descriptor_set_layout,
-        .pushConstantRangeCount = 1,
-        .pPushConstantRanges = &push_constant,
-    };
-
-    vkCreatePipelineLayout(device, &plci, nullptr, &pipelineLayout);
-}
-
-void Renderer::createFrameResources() {
-
-    const VkSemaphoreCreateInfo seci{
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-    };
-
-    VkFenceCreateInfo fenceInfo{
-        .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-        .flags = VK_FENCE_CREATE_SIGNALED_BIT,
-    };
-
-    VkCommandPoolCreateInfo cpci{
-        .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-        .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
-        .queueFamilyIndex = queueFamily
-    };
-
-    frames.resize(MAX_FRAMES_IN_FLIGHT);
-
-    for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) {
-        Frame &frame = frames[i];
-
-        vkCreateSemaphore(device, &seci, nullptr, &frame.imageAvailable);
-
-        vkCreateFence(device, &fenceInfo, nullptr, &frame.in_flight_fence);
-
-
-        vkCreateCommandPool(device, &cpci, nullptr, &frame.commandPool);
-
-        const VkCommandBufferAllocateInfo cbai{
-            .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-            .commandPool = frame.commandPool,
-            .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-            .commandBufferCount = 1
-        };
-
-        vkAllocateCommandBuffers(device, &cbai, &frame.command_buffer);
-
-        VkBufferCreateInfo bufferInfo = {
-            .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-            .size = 1024 * 1024 * 4,
-            .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-        };
-
-        VmaAllocationCreateInfo allocCreateInfo = {};
-        allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
-        allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
-            VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
-            VMA_ALLOCATION_CREATE_MAPPED_BIT;
-
-        vmaCreateBuffer(
-            allocator,
-            &bufferInfo,
-            &allocCreateInfo,
-            &frame.vertexBuffer.buffer,
-            &frame.vertexBuffer.allocation,
-            &frame.vertexBuffer.info);
-    }
-}
-
-AllocatedBuffer Renderer::create_buffer(size_t allocSize, VkBufferUsageFlags usage, VmaMemoryUsage memoryUsage) {
-    // allocate buffer
-    VkBufferCreateInfo bufferInfo = {.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
-    bufferInfo.pNext = nullptr;
-    bufferInfo.size = allocSize;
-
-    bufferInfo.usage = usage;
-
-    VmaAllocationCreateInfo vmaallocInfo = {};
-    vmaallocInfo.usage = memoryUsage;
-    vmaallocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
-    AllocatedBuffer newBuffer{};
-
-    // allocate the buffer
-    vmaCreateBuffer(allocator, &bufferInfo, &vmaallocInfo, &newBuffer.buffer, &newBuffer.allocation, &newBuffer.info);
-
-    return newBuffer;
-}
-
-void Renderer::destroy_buffer(const AllocatedBuffer& buffer) {
-    vmaDestroyBuffer(allocator, buffer.buffer, buffer.allocation);
-}
-
-// GPUMeshBuffers Renderer::uploadMesh(std::span<uint32_t> indices, std::span<vertex_p2_st2_col4_a1_u32> vertices) {
-//     const size_t vertexBufferSize = vertices.size() * sizeof(vertex_p2_st2_col4_a1_u32);
-//     const size_t indexBufferSize = indices.size() * sizeof(uint32_t);
-//
-//     GPUMeshBuffers newSurface;
-//
-//     //create vertex buffer
-//     newSurface.vertexBuffer = create_buffer(vertexBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
-//         VMA_MEMORY_USAGE_GPU_ONLY);
-//
-//     //find the adress of the vertex buffer
-//     VkBufferDeviceAddressInfo deviceAdressInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,.buffer = newSurface.vertexBuffer.buffer };
-//     newSurface.vertexBufferAddress = vkGetBufferDeviceAddress(device, &deviceAdressInfo);
-//
-//     //create index buffer
-//     newSurface.indexBuffer = create_buffer(indexBufferSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-//         VMA_MEMORY_USAGE_GPU_ONLY);
-//
-//     AllocatedBuffer staging = create_buffer(vertexBufferSize + indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_MEMORY_USAGE_CPU_ONLY);
-//
-//     void* data = staging.allocation->GetMappedData();
-//
-//     // copy vertex buffer
-//     memcpy(data, vertices.data(), vertexBufferSize);
-//     // copy index buffer
-//     memcpy((char*)data + vertexBufferSize, indices.data(), indexBufferSize);
-//
-//     immediate_submit([&](VkCommandBuffer cmd) {
-//         VkBufferCopy vertexCopy{ 0 };
-//         vertexCopy.dstOffset = 0;
-//         vertexCopy.srcOffset = 0;
-//         vertexCopy.size = vertexBufferSize;
-//
-//         vkCmdCopyBuffer(cmd, staging.buffer, newSurface.vertexBuffer.buffer, 1, &vertexCopy);
-//
-//         VkBufferCopy indexCopy{ 0 };
-//         indexCopy.dstOffset = 0;
-//         indexCopy.srcOffset = vertexBufferSize;
-//         indexCopy.size = indexBufferSize;
-//
-//         vkCmdCopyBuffer(cmd, staging.buffer, newSurface.indexBuffer.buffer, 1, &indexCopy);
-//     });
-//
-//     destroy_buffer(staging);
-//
-//     return newSurface;
-//
-// }
-
-VkPipeline Renderer::get_pipeline(PipelineType type) const {
+MTL::RenderPipelineState *Renderer::get_pipeline(PipelineType type) const {
     switch (type) {
     case PipelineType::TexturedQuad: return textured_quad_pipeline;
     case PipelineType::ColoredQuad:  return colored_quad_pipeline;
@@ -308,291 +145,159 @@ VkPipeline Renderer::get_pipeline(PipelineType type) const {
 //     );
 // }
 
-void Renderer::create_descriptor_pool() {
-    VkDescriptorPoolSize pool_sizes[] = {
-        { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, nextTextureSlot },
-    };
-
-    VkDescriptorPoolCreateInfo pool_info{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
-        .flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT,
-        .maxSets = 1,
-        .poolSizeCount = 1,
-        .pPoolSizes = pool_sizes
-    };
-
-    vkCreateDescriptorPool(device, &pool_info, nullptr, &descriptorPool);
-
-    VkDescriptorSetAllocateInfo alloc_info{
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
-        .descriptorPool = descriptorPool,
-        .descriptorSetCount = 1,
-        .pSetLayouts = &descriptor_set_layout
-    };
-
-    vkAllocateDescriptorSets(device, &alloc_info, &set);
+void Renderer::end_frame(GLFWwindow *window) {
+    send_render_command(window);
 }
 
-void Renderer::update_bindless_slot(uint32_t slot, VkImageView view, VkSampler sampler) const {
-    VkDescriptorImageInfo image_info{
-        .sampler = sampler,
-        .imageView = view,
-        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
-    };
+void Renderer::create_render_pipeline() {
+    MTL::Function *vertex_shader{};
+    MTL::Function *fragment_shader{};
+    // load_metal_shader(
+    //     "shaders/shaders.metallib",
+    //     "vertex_main",
+    //     "fragment_main",
+    //     &vertex_shader,
+    //     &fragment_shader);
 
-    VkWriteDescriptorSet write{
-        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-        .dstSet = set,
-        .dstArrayElement = slot,
-        .descriptorCount = 1,
-        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-        .pImageInfo = &image_info
-    };
+    // 3. Load the Module
+    Slang::ComPtr<slang::IBlob> diagnostics;
+    Slang::ComPtr<slang::IModule> module(slangSession->loadModule("shader", diagnostics.writeRef()));
 
-    vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
+    if(diagnostics) {
+        fprintf(stderr, "%s\n", (const char*) diagnostics->getBufferPointer());
+    }
+
+    Slang::ComPtr<slang::IEntryPoint> vertEntry;
+    Slang::ComPtr<slang::IEntryPoint> fragEntry;
+    module->findEntryPointByName("vs_main", vertEntry.writeRef());
+    module->findEntryPointByName("fs_main", fragEntry.writeRef());
+
+    slang::IComponentType* components[] = { module, vertEntry, fragEntry };
+    Slang::ComPtr<slang::IComponentType> program;
+    slangSession->createCompositeComponentType(components, 3, program.writeRef());
+
+    Slang::ComPtr<slang::IBlob> code;
+    Slang::ComPtr<slang::IBlob> diagnosticBlob;
+
+    program->getTargetCode(
+        0,
+        code.writeRef(),
+        diagnostics.writeRef()
+        );
+
+    if (diagnostics)
+        std::println("{}", (const char*)diagnostics->getBufferPointer());
+
+    std::println("Generated MSL:\n{}", (const char*)code->getBufferPointer());
+
+    NS::Error *error = nullptr;
+    MTL::Library *library = nullptr;
+    NS::String* source = NS::String::string((const char *) code->getBufferPointer(), NS::UTF8StringEncoding);
+    MTL::CompileOptions* opts = MTL::CompileOptions::alloc()->init();
+    library = metal_device.device->newLibrary(source, opts, &error);
+    opts->release();
+
+    if (!library) {
+        if (error) {
+            std::println("Metal library compile error:");
+            std::println("Domain: {}",
+                error->domain()->utf8String());
+            std::println("Code: {}",
+                error->code());
+            std::println("Description:\n{}",
+                error->localizedDescription()->utf8String());
+        }
+        std::abort();
+    }
+
+    NS::String *vname = NS::String::string("vs_main", NS::UTF8StringEncoding);
+    NS::String *fname = NS::String::string("fs_main", NS::UTF8StringEncoding);
+    vertex_shader = library->newFunction(vname);
+    fragment_shader = library->newFunction(fname);
+
+    if (!vertex_shader) {
+        std::println("vs_main not found in generated MSL");
+        std::abort();
+    }
+    if (!fragment_shader) {
+        std::println("fs_main not found in generated MSL");
+        std::abort();
+    }
+
+    library->release();
+
+    MTL::VertexDescriptor *vd = vertex_p2_s2_uv2_c4_a1::vertexDescriptor();
+
+    MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
+    renderPipelineDescriptor->setLabel(NS::String::string("Triangle Rendering Pipeline", NS::ASCIIStringEncoding));
+    renderPipelineDescriptor->setVertexFunction(vertex_shader);
+    renderPipelineDescriptor->setFragmentFunction(fragment_shader);
+    renderPipelineDescriptor->setVertexDescriptor(vd);
+    assert(renderPipelineDescriptor);
+    const MTL::PixelFormat pixel_format = metal_layer->pixelFormat();
+    renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(pixel_format);
+
+    textured_quad_pipeline = metal_device.device->newRenderPipelineState(renderPipelineDescriptor, &error);
+    if (!textured_quad_pipeline) {
+        if (error) {
+            std::println("Pipeline error: {}",
+                error->localizedDescription()->utf8String());
+        }
+        std::abort();
+    }
+    renderPipelineDescriptor->release();
+    vd->release();
 }
 
-void Renderer::upload_texture(
-    const int w,
-    const int h,
-    const void* pixels,
-    VkImage *image,
-    VmaAllocation *allocation,
-    VkImageView *view,
-    uint32_t *descriptor_index)
-{
-    VkDeviceSize imageSize = w * h * 4;
+float4x4 make_ortho(float left, float right, float bottom, float top, float near, float far) {
+    float sx =  2.0f / (right - left);
+    float sy =  2.0f / (top - bottom);
+    float sz =  1.0f / (far - near);
 
-    // --- 1. Create Staging Buffer (CPU Visible) ---
-    VkBuffer stagingBuffer;
-    VmaAllocation stagingAlloc;
+    float tx = -(right + left) / (right - left);
+    float ty = -(bottom + top) / (top - bottom);
+    float tz = -near           / (far - near);
 
-    VkBufferCreateInfo stagingBufferInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-    stagingBufferInfo.size = imageSize;
-    stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
-
-    VmaAllocationCreateInfo stagingAllocCreateInfo = {
-        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT,
-        .usage = VMA_MEMORY_USAGE_AUTO,
-    };
-
-    VmaAllocationInfo stagingResultInfo;
-    vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocCreateInfo, &stagingBuffer, &stagingAlloc, &stagingResultInfo);
-
-    // Copy raw pixels into the mapped memory provided by VMA
-    memcpy(stagingResultInfo.pMappedData, pixels, imageSize);
-
-    // --- 2. Create GPU Image (Device Local / Tiled) ---
-    VkExtent3D imageExtent = { (uint32_t) w, (uint32_t) h, 1 };
-
-    VkImageCreateInfo imageInfo = {
-        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-        .imageType = VK_IMAGE_TYPE_2D,
-        .format = VK_FORMAT_R8G8B8A8_UNORM,
-        .extent = imageExtent,
-        .mipLevels = 1,
-        .arrayLayers = 1,
-        .samples = VK_SAMPLE_COUNT_1_BIT,
-        .tiling = VK_IMAGE_TILING_OPTIMAL,
-        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
-        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED
-    };
-
-    VmaAllocationCreateInfo imageAllocCreateInfo = {
-        .usage = VMA_MEMORY_USAGE_AUTO,
-        .priority = 1.0f,
-    };
-
-    vmaCreateImage(allocator, &imageInfo, &imageAllocCreateInfo, image, allocation, nullptr);
-
-    // --- 3. The Transfer ---
-    immediate_submit([&](VkCommandBuffer cmd) {
-        // Transition image from UNDEFINED to TRANSFER_DST
-        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
-
-        VkBufferImageCopy copyRegion = {};
-        copyRegion.bufferOffset = 0;
-        copyRegion.bufferRowLength = 0;
-        copyRegion.bufferImageHeight = 0;
-        copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-        copyRegion.imageSubresource.mipLevel = 0;
-        copyRegion.imageSubresource.baseArrayLayer = 0;
-        copyRegion.imageSubresource.layerCount = 1;
-        copyRegion.imageExtent = imageExtent;
-
-        vkCmdCopyBufferToImage(cmd, stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copyRegion);
-
-        // Transition image from TRANSFER_DST to SHADER_READ_ONLY
-        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-    });
-
-    // Clean up temporary staging resources
-    vmaDestroyBuffer(allocator, stagingBuffer, stagingAlloc);
-
-    // --- 4. Finalize Handles ---
-    *view = create_image_view(*image, imageInfo.format);
-
-    // Register in your Bindless Array (Set 0, Binding 0, Index N)
-    *descriptor_index = nextTextureSlot++;
-    update_bindless_slot(*descriptor_index, *view, defaultSampler);
-}
-
-void Renderer::immediate_submit(std::function<void(VkCommandBuffer)>&& func) const {
-    VkCommandBufferAllocateInfo allocInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
-    allocInfo.commandPool = frames[currentFrame].commandPool; // Use a pool created with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
-    allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-    allocInfo.commandBufferCount = 1;
-
-    VkCommandBuffer cmd;
-    vkAllocateCommandBuffers(device, &allocInfo, &cmd);
-
-    VkCommandBufferBeginInfo beginInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
-    beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-
-    vkBeginCommandBuffer(cmd, &beginInfo);
-
-    // Execute the code passed in the lambda
-    func(cmd);
-
-    vkEndCommandBuffer(cmd);
-
-    VkSubmitInfo submit{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO };
-    submit.commandBufferCount = 1;
-    submit.pCommandBuffers = &cmd;
-
-    // Submit and wait
-    vkQueueSubmit(graphics_queue, 1, &submit, VK_NULL_HANDLE);
-    vkQueueWaitIdle(graphics_queue);
-
-    vkFreeCommandBuffers(device, frames[currentFrame].commandPool, 1, &cmd);
-}
-
-void Renderer::transition_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout) const {
-    VkImageMemoryBarrier2 barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
-
-    barrier.oldLayout = oldLayout;
-    barrier.newLayout = newLayout;
-    barrier.image = image;
-    barrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
-
-    // Simple synchronization: wait for all previous commands, and block all future ones
-    // You can optimize these masks later, but this is safe for a 2D engine
-    barrier.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
-    barrier.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT;
-    barrier.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
-    barrier.dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
-
-    VkDependencyInfo dep{ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
-    dep.imageMemoryBarrierCount = 1;
-    dep.pImageMemoryBarriers = &barrier;
-
-    vkCmdPipelineBarrier2(cmd, &dep);
-}
-
-VkImageView Renderer::create_image_view(VkImage image, VkFormat format) const {
-    VkImageViewCreateInfo viewInfo{
-        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-        .image = image,
-        .viewType = VK_IMAGE_VIEW_TYPE_2D,
-        .format = format,
-    };
-
-    // Default component mapping (R,G,B,A)
-    viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
-    viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
-    viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
-    viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
-
-    // Which part of the image to look at (Mip 0, Layer 0)
-    viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-    viewInfo.subresourceRange.baseMipLevel = 0;
-    viewInfo.subresourceRange.levelCount = 1;
-    viewInfo.subresourceRange.baseArrayLayer = 0;
-    viewInfo.subresourceRange.layerCount = 1;
-
-    VkImageView view;
-    vkCreateImageView(device, &viewInfo, nullptr, &view);
-    return view;
-}
-
-void Renderer::create_default_sampler() {
-    VkSamplerCreateInfo samplerInfo{
-        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
-        // For crisp pixel art, use NEAREST. For smooth textures, use LINEAR.
-        .magFilter = VK_FILTER_NEAREST,
-        .minFilter = VK_FILTER_NEAREST,
-
-        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
-
-        // How to handle "out of bounds" UVs
-        .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-        .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-        .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-
-        // Optimization: turn off things we don't need for simple 2D
-        .anisotropyEnable = VK_FALSE,
-        .maxAnisotropy = 1.0f,
-        .compareEnable = VK_FALSE,
-        .compareOp = VK_COMPARE_OP_ALWAYS,
-        .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
-        .unnormalizedCoordinates = VK_FALSE,
-    };
-
-    vkCreateSampler(device, &samplerInfo, nullptr, &defaultSampler);
-}
-
-void Renderer::end_frame() {
-
-    Frame &frame = frames[currentFrame];
-
-    vkWaitForFences(device, 1, &frame.in_flight_fence, VK_TRUE, UINT64_MAX);
-    vkResetFences(device, 1, &frame.in_flight_fence);
-
-    uint32_t imageIndex;
-    vkAcquireNextImageKHR(
-        device,
-        swapchain,
-        UINT64_MAX,
-        frame.imageAvailable,
-        VK_NULL_HANDLE,
-        &imageIndex
+    return float4x4(
+        float4{sx,   0.0f, 0.0f, 0.0f},
+        float4{0.0f, sy,   0.0f, 0.0f},
+        float4{0.0f, 0.0f, sz,   0.0f},
+        float4{tx,   ty,   tz,   1.0f}
     );
+}
 
-    commands = counting_sort_descending(commands, [](const RenderCommand &cmd){
-        return cmd.key.depth;
-    });
+void Renderer::encode_render_command(GLFWwindow *window, MTL::RenderCommandEncoder *render_command_encoder) {
 
-    std::vector<vertex_p2_s2_st2_col4_a1_u32> vertices;
+    std::vector<vertex_p2_s2_uv2_c4_a1> vertices;
 
     for (auto& cmd : commands) {
 
-
         switch (cmd.pipeline) {
         case PipelineType::ColoredQuad: {
             const auto &q = cmd.colored_quad;
 
-            // Calculate spatial corners
-            //float x0 = q.position.x;
-            //float y0 = q.position.y;
-            //float x1 = q.position.x + q.size.x;
-            //float y1 = q.position.y + q.size.y;
+            vertex_p2_s2_uv2_c4_a1 vTL = { q.pos, q.scale, {}, q.colour };
+            vertex_p2_s2_uv2_c4_a1 vTR = { q.pos, q.scale, {}, q.colour };
+            vertex_p2_s2_uv2_c4_a1 vBL = { q.pos, q.scale, {}, q.colour };
+            vertex_p2_s2_uv2_c4_a1 vBR = { q.pos, q.scale, {}, q.colour };
 
-            // Calculate UV corners
-            // float u0 = q.uvMin.x;
-            // float v0 = q.uvMin.y;
-            // float u1 = q.uvMax.x;
-            // float v1 = q.uvMax.y;
+            vertices.push_back(vTL);
+            vertices.push_back(vBL);
+            vertices.push_back(vTR);
 
-            // Define the 4 corners of the quad
-            vertex_p2_s2_st2_col4_a1_u32 vTL = { q.pos, q.scale, {0, 0}, {1, 0, 0, 0}, 1, 0 };
-            vertex_p2_s2_st2_col4_a1_u32 vTR = { q.pos, q.scale, {0, 0}, q.color, 1, 0 };
-            vertex_p2_s2_st2_col4_a1_u32 vBL = { q.pos, q.scale, {0, 0}, q.color, 1, 0 };
-            vertex_p2_s2_st2_col4_a1_u32 vBR = { q.pos, q.scale, {0, 0}, q.color, 1, 0 };
+            vertices.push_back(vTR);
+            vertices.push_back(vBL);
+            vertices.push_back(vBR);
 
-            // vertex_p2_st2_col4_a1_u32 vTL = { {x0, y0}, {u0, v0}, q.color, 1, q.textureID };
-            // vertex_p2_st2_col4_a1_u32 vTR = { {x1, y0}, {u1, v0}, q.color, 1, q.textureID };
-            // vertex_p2_st2_col4_a1_u32 vBL = { {x0, y1}, {u0, v1}, q.color, 1, q.textureID };
-            // vertex_p2_st2_col4_a1_u32 vBR = { {x1, y1}, {u1, v1}, q.color, 1, q.textureID };
+            break;
+        }
+        case PipelineType::TexturedQuad: {
+            const auto &q = cmd.textured_quad;
+
+            vertex_p2_s2_uv2_c4_a1 vTL = { q.pos, q.scale, q.uv0, q.colour, 1.0 };
+            vertex_p2_s2_uv2_c4_a1 vTR = { q.pos, q.scale, {q.uv1.x, q.uv0.y}, q.colour, 1.0 };
+            vertex_p2_s2_uv2_c4_a1 vBL = { q.pos, q.scale, {q.uv0.x, q.uv1.y}, q.colour, 1.0 };
+            vertex_p2_s2_uv2_c4_a1 vBR = { q.pos, q.scale, q.uv1, q.colour, 1.0};
 
             vertices.push_back(vTL);
             vertices.push_back(vBL);
@@ -609,220 +314,66 @@ void Renderer::end_frame() {
         }
     }
 
-    VkCommandBuffer cmd = frame.command_buffer;
-    vkResetCommandBuffer(cmd, 0);
+    const Frame &frame = frames[current_frame];
 
-    VkCommandBufferBeginInfo cbBI {
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
-    };
-    vkBeginCommandBuffer(cmd, &cbBI);
+    memcpy(frame.vertex_buffer->contents(), vertices.data(), vertices.size() * sizeof(vertex_p2_s2_uv2_c4_a1));
 
-    recordCommandBuffer(
-        cmd,
-        images[imageIndex],
-        imageViews[imageIndex],
-        swapchain_extent,
-        imageLayouts[imageIndex],
-        frame,
-        vertices
-    );
+    MTL::SamplerDescriptor* sampler_desc = MTL::SamplerDescriptor::alloc()->init();
 
-    vkEndCommandBuffer(cmd);
+    sampler_desc->setMinFilter(MTL::SamplerMinMagFilterLinear);
+    sampler_desc->setMagFilter(MTL::SamplerMinMagFilterLinear);
+    MTL::SamplerState* sampler = metal_device.device->newSamplerState(sampler_desc);
 
-    imageLayouts[imageIndex] = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    int width, height;
+    glfwGetFramebufferSize(window, &width, &height);
+    simd::float4x4 ortho = make_ortho(0.0, window_width, window_height, 0.0, 1.0, 0.0);
+    auto tm = simd::transpose(ortho);
+    memcpy(frame.uniform_buffer->contents(), &tm, sizeof(ortho));
+    render_command_encoder->setRenderPipelineState(textured_quad_pipeline);
+    render_command_encoder->setVertexBuffer(frame.vertex_buffer, 0, 0);
+    render_command_encoder->setVertexBuffer(frame.uniform_buffer, 0, 1);
+    render_command_encoder->setFragmentSamplerState(sampler, 0);
+    MTL::PrimitiveType type_triangle = MTL::PrimitiveTypeTriangle;
+    NS::UInteger vertexStart = 0;
+    NS::UInteger vertexCount = vertices.size();
+    const Texture &texture = texture_manager.textures["assets/boy.png"];
+    render_command_encoder->setFragmentTexture(texture.p_texture->texture, 0);
+    render_command_encoder->drawPrimitives(type_triangle, vertexStart, vertexCount);
 
-    VkSemaphoreSubmitInfo waitBinary{
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
-        .semaphore = frame.imageAvailable,
-        .stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT
-    };
-
-    VkSemaphoreSubmitInfo signalBinary{
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
-        .semaphore = renderFinished[imageIndex],
-        .stageMask = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT
-    };
-
-    VkCommandBufferSubmitInfo cmdInfo{
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
-        .commandBuffer = cmd,
-    };
-
-    const VkSubmitInfo2 submit{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
-        .waitSemaphoreInfoCount = 1,
-        .pWaitSemaphoreInfos = &waitBinary,
-        .commandBufferInfoCount = 1,
-        .pCommandBufferInfos = &cmdInfo,
-        .signalSemaphoreInfoCount = 1,
-        .pSignalSemaphoreInfos = &signalBinary,
-    };
-
-    vkQueueSubmit2(graphics_queue, 1, &submit, frame.in_flight_fence);
-
-    VkPresentInfoKHR present{
-        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
-        .waitSemaphoreCount = 1,
-        .pWaitSemaphores = &renderFinished[imageIndex],
-        .swapchainCount = 1,
-        .pSwapchains = &swapchain,
-        .pImageIndices = &imageIndex,
-    };
-
-    vkQueuePresentKHR(graphics_queue, &present);
-
-    currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT;
+    sampler_desc->release();
 }
 
-void Renderer::upload_vertex_buffer(
-    VkCommandBuffer cmd,
-    const Frame &frame,
-    std::span<const vertex_p2_s2_st2_col4_a1_u32> vertices) const
-{
-    VkMemoryPropertyFlags memPropFlags;
-    vmaGetAllocationMemoryProperties(allocator, frame.vertexBuffer.allocation, &memPropFlags);
+void Renderer::send_render_command(GLFWwindow *window) {
+    dispatch_semaphore_wait(frame_semaphore, DISPATCH_TIME_FOREVER);
 
-    if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
-        // The Allocation ended up in a mappable memory.
-        // Calling vmaCopyMemoryToAllocation() does vmaMapMemory(), memcpy(), vmaUnmapMemory(), and vmaFlushAllocation().
-        VkResult result = vmaCopyMemoryToAllocation(allocator, vertices.data(), frame.vertexBuffer.allocation, 0, vertices.size() * sizeof(vertex_p2_s2_st2_col4_a1_u32));
-        // Check result...
+    command_buffer = queue->commandBuffer();
 
-        VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
-        bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
-        bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT;
-        bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        bufMemBarrier.buffer = frame.vertexBuffer.buffer;
-        bufMemBarrier.offset = 0;
-        bufMemBarrier.size = VK_WHOLE_SIZE;
-
-        // It's important to insert a buffer memory barrier here to ensure writing to the buffer has finished.
-        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
-            0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
-    }
-}
-
-void Renderer::recordCommandBuffer(
-    VkCommandBuffer cmd,
-    VkImage image,
-    VkImageView imageView,
-    VkExtent2D extent,
-    VkImageLayout oldLayout,
-    const Frame &frame,
-    const std::vector<vertex_p2_s2_st2_col4_a1_u32> &vertices) const
-{
-
-    {
-        VkImageMemoryBarrier2 toColor{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
-        toColor.srcStageMask  = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
-        toColor.dstStageMask  = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
-        toColor.dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
-        toColor.oldLayout     = VK_IMAGE_LAYOUT_UNDEFINED;
-        toColor.newLayout     = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-        toColor.image         = image;
-        toColor.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
-
-        VkDependencyInfo dep{
-            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
-            .imageMemoryBarrierCount = 1,
-            .pImageMemoryBarriers = &toColor
-        };
-        vkCmdPipelineBarrier2(cmd, &dep);
-    }
-
-    VkClearValue clearColor = {{{0.1f, 0.1f, 0.2f, 1.0f}}};
-    VkRenderingAttachmentInfo colorAttach{
-        .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
-        .imageView = imageView,
-        .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-        .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
-        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-        .clearValue = clearColor
-    };
-
-    VkRenderingInfo ri{
-        .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
-        .renderArea = {{0,0}, extent},
-        .layerCount = 1,
-        .colorAttachmentCount = 1,
-        .pColorAttachments = &colorAttach
-    };
-
-    upload_vertex_buffer(cmd, frame, vertices);
-
-    vkCmdBeginRendering(cmd, &ri);
-
-    VkViewport vp{0.0f, 0.0f, (float)extent.width, (float)extent.height, 0.0f, 1.0f};
-    VkRect2D sc{{0, 0}, extent};
-    vkCmdSetViewport(cmd, 0, 1, &vp);
-    vkCmdSetScissor(cmd, 0, 1, &sc);
-
-    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, 0, nullptr);
-    VkDeviceSize vOffset{ 0 };
-    vkCmdBindVertexBuffers(cmd, 0, 1, &frame.vertexBuffer.buffer, &vOffset);
-
-    glm::mat4 projection = glm::ortho(0.0f, (float)window_width, 0.0f, (float)window_height, -1.0f, 1.0f);
-
-    vkCmdPushConstants(
-        cmd,
-        pipelineLayout,
-        VK_SHADER_STAGE_VERTEX_BIT,
-        0,
-        sizeof(glm::mat4),
-        &projection
-    );
-
-    PipelineType lastPipeline = PipelineType::None; // Track current state
-    // uint32_t vertexOffset = currentFrame * MAX_VERTICES_PER_BATCH;
-    uint32_t currentBatchVertices = 0;
-
-    for (const auto & render_command : commands) {
-        // Only switch pipelines if we have to
-        if (render_command.pipeline != lastPipeline) {
-            // If we were mid-batch, draw what we have before switching
-            if (currentBatchVertices > 0) {
-                vkCmdDraw(cmd, currentBatchVertices, 1, 0, 0);
-                // vertexOffset += currentBatchVertices;
-                currentBatchVertices = 0;
-            }
-
-            vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, get_pipeline(render_command.pipeline));
-            lastPipeline = render_command.pipeline;
-        }
-
-        currentBatchVertices += 6;
-    }
-
-    // Draw the final batch
-    if (currentBatchVertices > 0) {
-        vkCmdDraw(cmd, currentBatchVertices, 1, 0, 0);
-    }
-
-    vkCmdEndRendering(cmd);
-
-    // 3. Transition back to Present
-    {
-        VkImageMemoryBarrier2 toPresent{
-            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
-            .srcStageMask  = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
-            .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
-            .dstStageMask  = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-            .dstAccessMask = 0,
-            .oldLayout     = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-            .newLayout     = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
-            .image         = image,
-            .subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 },
-        };
-
-        VkDependencyInfo dep{
-            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
-            .imageMemoryBarrierCount = 1,
-            .pImageMemoryBarriers = &toPresent
-        };
-        
-        vkCmdPipelineBarrier2(cmd, &dep);
-    }
+    dispatch_semaphore_t sem = frame_semaphore;
+    command_buffer->addCompletedHandler([sem](MTL::CommandBuffer* pBuf) {
+        dispatch_semaphore_signal(sem);
+    });
 
+    MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
+    MTL::RenderPassColorAttachmentDescriptor *cd = renderPassDescriptor->colorAttachments()->object(0);
+    cd->setTexture(metal_drawable->texture());
+    cd->setLoadAction(MTL::LoadActionClear);
+    cd->setClearColor(MTL::ClearColor(
+        100.0f / 255.0f,
+        149.0f / 255.0f,
+        237.0f / 255.0f,
+        1.0
+        ));
+    cd->setStoreAction(MTL::StoreActionStore);
+
+    MTL::RenderCommandEncoder* renderCommandEncoder = command_buffer->renderCommandEncoder(renderPassDescriptor);
+    encode_render_command(window, renderCommandEncoder);
+    renderCommandEncoder->endEncoding();
+
+    command_buffer->presentDrawable(metal_drawable);
+    command_buffer->commit();
+    command_buffer->waitUntilCompleted();
+
+    current_frame = (current_frame + 1) % kMaxFramesInFlight;
+
+    renderPassDescriptor->release();
 }
diff --git a/renderer/metal/renderer.h b/renderer/metal/renderer.h
index 2f21359..cfe011c 100644
--- a/renderer/metal/renderer.h
+++ b/renderer/metal/renderer.h
@@ -5,25 +5,13 @@
 #ifndef V_RENDERER_H
 #define V_RENDERER_H
 
-#include "init.h"
-#include <volk/volk.h>
+#include "metal.h"
 #include <GLFW/glfw3.h>
-#define GLM_FORCE_RADIANS
-#define GLM_FORCE_DEPTH_ZERO_TO_ONE
-#define GLM_ENABLE_EXPERIMENTAL
-#include <glm/glm.hpp>
-#include <glm/ext/matrix_clip_space.hpp>
-#include "glm/gtx/string_cast.hpp"
-#include <vma/vk_mem_alloc.h>
 #include "../sprite.h"
-#include "../texture.h"
 #include <misc.h>
-#include <array>
-#include <span>
-#include <slang/slang.h>
-#include <slang/slang-com-ptr.h>
+#include <simd/simd.h>
 
-inline Slang::ComPtr<slang::IGlobalSession> slangGlobalSession;
+static const int kMaxFramesInFlight = 3;
 
 enum class PROJECTION_TYPE : uint8_t {
     NONE,
@@ -34,32 +22,6 @@ enum class PROJECTION_TYPE : uint8_t {
     COUNT,
 };
 
-struct vertex_p2_s2_st2_col4_a1_u32 {
-    glm::vec2 pos;
-    glm::vec2 scale;
-    glm::vec2 uv;
-    glm::vec4 color;
-    float     alpha;
-    uint32_t  textureID;
-
-    static VkVertexInputBindingDescription getBindingDescription() {
-        return {0, sizeof(vertex_p2_s2_st2_col4_a1_u32), VK_VERTEX_INPUT_RATE_VERTEX};
-    }
-
-    static std::array<VkVertexInputAttributeDescription, 6> getAttributeDescriptions() {
-        return {
-            {
-                {0, 0, VK_FORMAT_R32G32_SFLOAT, offsetof(vertex_p2_s2_st2_col4_a1_u32, pos)},
-                {1, 0, VK_FORMAT_R32G32_SFLOAT, offsetof(vertex_p2_s2_st2_col4_a1_u32, scale)},
-                {2, 0, VK_FORMAT_R32G32_SFLOAT, offsetof(vertex_p2_s2_st2_col4_a1_u32, uv)},
-                {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(vertex_p2_s2_st2_col4_a1_u32, color)},
-                {4, 0, VK_FORMAT_R32_SFLOAT, offsetof(vertex_p2_s2_st2_col4_a1_u32, alpha)},
-                {5, 0, VK_FORMAT_R32_UINT, offsetof(vertex_p2_s2_st2_col4_a1_u32, textureID)},
-            }
-        };
-    }
-};
-
 // commands
 
 enum class PipelineType : uint8_t {
@@ -72,24 +34,24 @@ enum class PipelineType : uint8_t {
 };
 
 struct TexturedQuadCmd {
-    glm::vec2 position;
-    glm::vec2 size;
-    glm::vec2 uvMin;
-    glm::vec2 uvMax;
-    glm::vec4 color;
-    uint16_t textureID;
+    simd::float2 pos;
+    simd::float2 scale;
+    simd::float2 uv0;
+    simd::float2 uv1;
+    simd::float4 colour;
+    MTL::Texture *texture;
 };
 
 struct ColoredQuadCmd {
-    glm::vec2 pos;
-    glm::vec2 scale;
-    glm::vec4 color;
+    simd::float2 pos;
+    simd::float2 scale;
+    simd::float4 colour;
 };
 
 struct LineCmd {
-    glm::vec2 start;
-    glm::vec2 end;
-    glm::vec4 color;
+    simd::float2 start;
+    simd::float2 end;
+    simd::float4 color;
 };
 
 // struct TextCmd {
@@ -99,11 +61,11 @@ struct LineCmd {
 //     glm::vec4 color;
 // };
 
-struct ChunkCmd {
-    VkBuffer vertexBuffer;
-    VkBuffer indexBuffer;
-    uint32_t indexCount;
-};
+//struct ChunkCmd {
+//    VkBuffer vertexBuffer;
+//    VkBuffer indexBuffer;
+//    uint32_t indexCount;
+//};
 
 struct SortKey {
     uint16_t depth; // world Z or Y-sorted depth
@@ -123,280 +85,101 @@ struct RenderCommand {
         ColoredQuadCmd  colored_quad;
         LineCmd         line;
         // TextCmd         text;
-        ChunkCmd        chunk;
+        // ChunkCmd        chunk;
     };
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-struct AllocatedBuffer {
-    VkBuffer buffer;
-    VmaAllocation allocation;
-    VmaAllocationInfo info;
-};
+// struct AllocatedBuffer {
+//     VkBuffer buffer;
+//     VmaAllocation allocation;
+//     VmaAllocationInfo info;
+// };
 
-struct GPUMeshBuffers {
-    AllocatedBuffer indexBuffer;
-    AllocatedBuffer vertexBuffer;
-    VkDeviceAddress vertexBufferAddress;
-};
+// struct GPUMeshBuffers {
+//     AllocatedBuffer indexBuffer;
+//     AllocatedBuffer vertexBuffer;
+//     VkDeviceAddress vertexBufferAddress;
+// };
 
 struct Renderer {
     std::vector<RenderCommand> commands{};
 
-    VkDescriptorSetLayout descriptor_set_layout{};
-    VkPipelineLayout pipelineLayout{};
-    VkPipeline textured_quad_pipeline{};
-    VkPipeline colored_quad_pipeline{};
-    VkPipeline line_pipeline{};
-    VkPipeline text_pipeline{};
-    VkPipeline chunk_pipeline{};
-    VkDescriptorSet set{};
-
-    VkSampler defaultSampler{};
+    MTL::RenderPipelineState *textured_quad_pipeline{};
+    MTL::RenderPipelineState *colored_quad_pipeline{};
+    MTL::RenderPipelineState *line_pipeline{};
+    MTL::RenderPipelineState *text_pipeline{};
+    MTL::RenderPipelineState *chunk_pipeline{};
 
     uint32_t nextTextureSlot = 0;
 
+    MTL::CommandBuffer *command_buffer{};
+
     struct Frame {
-        VkCommandPool commandPool{};
-        VkCommandBuffer command_buffer{};
-
-        VkSemaphore imageAvailable{};
-        VkFence in_flight_fence{};
-
-        AllocatedBuffer vertexBuffer{};
+        MTL::Buffer *vertex_buffer{};
+        MTL::Buffer *uniform_buffer{};
     };
 
-    std::vector<Frame> frames;
-    uint32_t currentFrame = 0;
+    Frame frames[kMaxFramesInFlight];
 
-    VkDescriptorPool descriptorPool{};
-    std::vector<VkDescriptorSet> textureSets{};
+    dispatch_semaphore_t frame_semaphore;
+    uint8_t current_frame = 0;
 
+//     struct Frame {
+//         VkCommandPool commandPool{};
+//         VkCommandBuffer command_buffer{};
+//
+//         VkSemaphore imageAvailable{};
+//         VkFence in_flight_fence{};
+//
+//         AllocatedBuffer vertexBuffer{};
+//     };
+//
+//     std::vector<Frame> frames;
+//     uint32_t currentFrame = 0;
+//
+//     VkDescriptorPool descriptorPool{};
+//     std::vector<VkDescriptorSet> textureSets{};
+//
     void begin_frame();
-    void end_frame();
+    void end_frame(GLFWwindow *window);
     void flush();
 
-    void submit_sprite(glm::vec2 pos, const sprite_t &sprite);
-    void submit_quad(glm::vec2 pos, glm::vec2 scale);
+    void submit_sprite(simd::float2 pos, const sprite_t &sprite);
+    void submit_quad(simd::float2 pos, simd::float2 scale);
 
+    Renderer() = default;
     explicit Renderer(GLFWwindow *window);
-    void create_pipeline_layout();
-    void createFrameResources();
-    void create_default_sampler();
-    void recordCommandBuffer(
-        VkCommandBuffer cmd,
-        VkImage image,
-        VkImageView imageView,
-        VkExtent2D extent,
-        VkImageLayout oldLayout,
-        const Frame &frame,
-        const std::vector<vertex_p2_s2_st2_col4_a1_u32> &vertices) const;
-    void immediate_submit(std::function<void(VkCommandBuffer)>&& func) const;
-    void transition_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout) const;
-    VkImageView create_image_view(VkImage image, VkFormat format) const;
-    AllocatedBuffer create_buffer(size_t allocSize, VkBufferUsageFlags usage, VmaMemoryUsage memoryUsage);
-    void destroy_buffer(const AllocatedBuffer& buffer);
-    // GPUMeshBuffers uploadMesh(std::span<uint32_t> indices, std::span<vertex_p2_st2_col4_a1_u32> vertices);
-    void upload_vertex_buffer(
-        VkCommandBuffer cmd,
-        const Frame &frame,
-        std::span<const vertex_p2_s2_st2_col4_a1_u32> vertices) const;
-
-    [[nodiscard]] VkPipeline get_pipeline(PipelineType type) const;
-    // void bind_material(VkCommandBuffer cmd, uint16_t materialID);
-    void create_descriptor_pool();
-    void update_bindless_slot(uint32_t slot, VkImageView view, VkSampler sampler) const;
-
-    // Returns the resource info so the Manager can store it
-    void upload_texture(
-        int w,
-        int h,
-        const void* pixels,
-        VkImage *image,
-        VmaAllocation *allocation,
-        VkImageView *view,
-        uint32_t *descriptor_index);
-
-    template <typename T>
-    VkPipeline create_graphics_pipeline(
-        VkDevice device,
-        VkPipelineLayout layout,
-        VkFormat colorFormat,
-        // VkShaderModule vertShader,
-        // VkShaderModule fragShader,
-        VkPrimitiveTopology topology,
-        bool enableBlending)
-    {
-
-        auto slangTargets{ std::to_array<slang::TargetDesc>({ {
-            .format = SLANG_SPIRV,
-            .profile = slangGlobalSession->findProfile("spirv_1_4")
-        } })};
-        auto slangOptions{ std::to_array<slang::CompilerOptionEntry>({ {
-            slang::CompilerOptionName::EmitSpirvDirectly,
-            {slang::CompilerOptionValueKind::Int, 1}
-        } })};
-        slang::SessionDesc slangSessionDesc{
-            .targets = slangTargets.data(),
-            .targetCount = SlangInt(slangTargets.size()),
-            .defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
-            .compilerOptionEntries = slangOptions.data(),
-            .compilerOptionEntryCount = uint32_t(slangOptions.size())
-        };
-        Slang::ComPtr<slang::ISession> slangSession;
-        slangGlobalSession->createSession(slangSessionDesc, slangSession.writeRef());
-
-        Slang::ComPtr<slang::IModule> slangModule{
-            slangSession->loadModuleFromSource("triangle", "shaders/shader.slang", nullptr, nullptr)
-        };
-        Slang::ComPtr<ISlangBlob> spirv;
-        slangModule->getTargetCode(0, spirv.writeRef());
-
-        VkShaderModuleCreateInfo shaderModuleCI{
-            .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
-            .codeSize = spirv->getBufferSize(),
-            .pCode = (uint32_t*)spirv->getBufferPointer()
-        };
-        VkShaderModule shaderModule{};
-        vkCreateShaderModule(device, &shaderModuleCI, nullptr, &shaderModule);
-
-        auto vsCode = loadFile("shaders/triangle.vert.spv");
-        auto fsCode = loadFile("shaders/triangle.frag.spv");
-
-        VkShaderModuleCreateInfo smci{
-            .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO
-        };
-
-        smci.codeSize = vsCode.size();
-        smci.pCode = reinterpret_cast<uint32_t*>(vsCode.data());
-        // VkShaderModule vs;
-        // vkCreateShaderModule(device, &smci, nullptr, &vs);
-
-        smci.codeSize = fsCode.size();
-        smci.pCode = reinterpret_cast<uint32_t*>(fsCode.data());
-        // VkShaderModule fs;
-        // vkCreateShaderModule(device, &smci, nullptr, &fs);
-
-        // --- Shaders ---
-        std::vector<VkPipelineShaderStageCreateInfo> shaderStages{
-        { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-          .stage = VK_SHADER_STAGE_VERTEX_BIT,
-          .module = shaderModule, .pName = "main"},
-        { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-          .module = shaderModule, .pName = "main" }
-        };
-
-        // --- Vertex Input (Generic) ---
-        auto binding = T::getBindingDescription();
-        auto attrs = T::getAttributeDescriptions();
-
-        // --- Vertex Input (Matching our vertex_p2_st2_col4 struct) ---
-        VkPipelineVertexInputStateCreateInfo vi{
-            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-            .vertexBindingDescriptionCount = 1,
-            .pVertexBindingDescriptions = &binding,
-            .vertexAttributeDescriptionCount = attrs.size(),
-            .pVertexAttributeDescriptions = attrs.data(),
-        };
-
-        // --- Input Assembly (Changes based on Topology parameter) ---
-        VkPipelineInputAssemblyStateCreateInfo ia{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
-        ia.topology = topology;
-
-        // --- Blending (Changes based on enableBlending parameter) ---
-        VkPipelineColorBlendAttachmentState colorBlend{
-            .blendEnable = enableBlending ? VK_TRUE : VK_FALSE,
-            .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
-            .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
-            .colorBlendOp = VK_BLEND_OP_ADD,
-            .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
-            .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
-            .alphaBlendOp = VK_BLEND_OP_ADD,
-            .colorWriteMask = 0xF
-        };
-
-        // --- Boilerplate (Standard 2D Defaults) ---
-        VkPipelineViewportStateCreateInfo vp{
-            VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-            nullptr,
-            0,
-            1,
-            nullptr,
-            1,
-            nullptr
-        };
-
-        VkPipelineRasterizationStateCreateInfo rs{
-            VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-            nullptr,
-            0,
-            0,
-            0,
-            VK_POLYGON_MODE_FILL,
-            VK_CULL_MODE_NONE,
-            VK_FRONT_FACE_COUNTER_CLOCKWISE,
-            0,
-            0,
-            0,
-            0,
-            1.0f
-        };
-
-        VkPipelineMultisampleStateCreateInfo ms{
-            VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-            nullptr,
-            0,
-            VK_SAMPLE_COUNT_1_BIT
-        };
-
-        VkPipelineColorBlendStateCreateInfo cb{
-            VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-            nullptr,
-            0,
-            0,
-            VK_LOGIC_OP_AND,
-            1,
-            &colorBlend
-        };
-
-        VkDynamicState dyns[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR };
-        VkPipelineDynamicStateCreateInfo ds{
-            VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-            nullptr,
-            0,
-            2,
-            dyns
-        };
-
-        VkPipelineRenderingCreateInfo rci{
-            VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
-            nullptr,
-            0,
-            1,
-            &colorFormat
-        };
-
-        VkGraphicsPipelineCreateInfo gpci{
-            .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-            .pNext = &rci,
-            .stageCount = (uint32_t) shaderStages.size(),
-            .pStages = shaderStages.data(),
-            .pVertexInputState = &vi,
-            .pInputAssemblyState = &ia,
-            .pViewportState = &vp,
-            .pRasterizationState = &rs,
-            .pMultisampleState = &ms,
-            .pColorBlendState = &cb,
-            .pDynamicState = &ds,
-            .layout = layout
-        };
-
-        VkPipeline pipeline;
-        vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gpci, nullptr, &pipeline);
-        return pipeline;
-    }
+    void create_render_pipeline();
+    void send_render_command(GLFWwindow *window);
+    void encode_render_command(GLFWwindow *window, MTL::RenderCommandEncoder *render_command_encoder);
+//     void create_pipeline_layout();
+//     void createFrameResources();
+//     void create_default_sampler();
+//     void recordCommandBuffer(
+//         VkCommandBuffer cmd,
+//         VkImage image,
+//         VkImageView imageView,
+//         VkExtent2D extent,
+//         VkImageLayout oldLayout,
+//         const Frame &frame,
+//         const std::vector<vertex_p2_s2_st2_col4_a1_u32> &vertices) const;
+//     void immediate_submit(std::function<void(VkCommandBuffer)>&& func) const;
+//     void transition_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout) const;
+//     VkImageView create_image_view(VkImage image, VkFormat format) const;
+//     AllocatedBuffer create_buffer(size_t allocSize, VkBufferUsageFlags usage, VmaMemoryUsage memoryUsage);
+//     void destroy_buffer(const AllocatedBuffer& buffer);
+//     // GPUMeshBuffers uploadMesh(std::span<uint32_t> indices, std::span<vertex_p2_st2_col4_a1_u32> vertices);
+//     void upload_vertex_buffer(
+//         VkCommandBuffer cmd,
+//         const Frame &frame,
+//         std::span<const vertex_p2_s2_st2_col4_a1_u32> vertices) const;
+//
+    [[nodiscard]] MTL::RenderPipelineState *get_pipeline(PipelineType type) const;
+//     // void bind_material(VkCommandBuffer cmd, uint16_t materialID);
+//     void create_descriptor_pool();
+//     void update_bindless_slot(uint32_t slot, VkImageView view, VkSampler sampler) const;
 };
 
 #endif //V_RENDERER_H
diff --git a/renderer/metal/vertex_data.h b/renderer/metal/vertex_data.h
index 39d1eae..fed95c8 100644
--- a/renderer/metal/vertex_data.h
+++ b/renderer/metal/vertex_data.h
@@ -2,7 +2,56 @@
 // Created by Vicente Ferrari Smith on 27.02.26.
 //
 
-#ifndef V_VERTEX_DATA_H
-#define V_VERTEX_DATA_H
+#pragma once
+#include <simd/simd.h>
 
-#endif //V_VERTEX_DATA_H
+using namespace simd;
+
+struct vertex_p2_s2_uv2_c4_a1 {
+    float2 pos;
+    float2 scale;
+    float2 uv;
+    float4 color;
+    float  alpha;
+
+    static MTL::VertexDescriptor* vertexDescriptor() {
+        MTL::VertexDescriptor* vd = MTL::VertexDescriptor::alloc()->init();
+
+        // ATTRIBUTE 0 — float2 pos
+        vd->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2);
+        vd->attributes()->object(0)->setOffset(offsetof(vertex_p2_s2_uv2_c4_a1, pos));
+        vd->attributes()->object(0)->setBufferIndex(0);
+
+        // ATTRIBUTE 1 — float2 scale
+        vd->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2);
+        vd->attributes()->object(1)->setOffset(offsetof(vertex_p2_s2_uv2_c4_a1, scale));
+        vd->attributes()->object(1)->setBufferIndex(0);
+
+        // ATTRIBUTE 2 — float2 uv
+        vd->attributes()->object(2)->setFormat(MTL::VertexFormatFloat2);
+        vd->attributes()->object(2)->setOffset(offsetof(vertex_p2_s2_uv2_c4_a1, uv));
+        vd->attributes()->object(2)->setBufferIndex(0);
+
+        // ATTRIBUTE 3 — float4 color
+        vd->attributes()->object(3)->setFormat(MTL::VertexFormatFloat4);
+        vd->attributes()->object(3)->setOffset(offsetof(vertex_p2_s2_uv2_c4_a1, color));
+        vd->attributes()->object(3)->setBufferIndex(0);
+
+        // ATTRIBUTE 4 — float alpha
+        vd->attributes()->object(4)->setFormat(MTL::VertexFormatFloat);
+        vd->attributes()->object(4)->setOffset(offsetof(vertex_p2_s2_uv2_c4_a1, alpha));
+        vd->attributes()->object(4)->setBufferIndex(0);
+
+        // Layout for buffer 0
+        vd->layouts()->object(0)->setStride(sizeof(vertex_p2_s2_uv2_c4_a1));
+        vd->layouts()->object(0)->setStepFunction(MTL::VertexStepFunctionPerVertex);
+
+        return vd;
+    }
+};
+
+struct TransformationData {
+    float4x4 model_matrix;
+    float4x4 view_matrix;
+    float4x4 perspective_matrix;
+};
diff --git a/renderer/sprite.cpp b/renderer/sprite.cpp
index dfab6ac..fb9b965 100644
--- a/renderer/sprite.cpp
+++ b/renderer/sprite.cpp
@@ -2,4 +2,4 @@
 // Created by Vicente Ferrari Smith on 14.02.26.
 //
 
-#include "../sprite.h"
+#include "sprite.h"
diff --git a/renderer/sprite.h b/renderer/sprite.h
index a272c77..1911172 100644
--- a/renderer/sprite.h
+++ b/renderer/sprite.h
@@ -2,8 +2,8 @@
 // Created by Vicente Ferrari Smith on 14.02.26.
 //
 
-#ifndef V_SPRITE_H
-#define V_SPRITE_H
+#ifndef SPRITE_H
+#define SPRITE_H
 
 #include <glm/glm.hpp>
 #include "texture_sheet.h"
@@ -17,8 +17,7 @@ struct sprite_t {
     bool window_space;
     bool maintain_ar;
 
-    texture_sheet_id texture_sheet;
-    texture_cell_id texture_cell;
+    texture_id texture;
 };
 
-#endif //V_SPRITE_H
\ No newline at end of file
+#endif //SPRITE_H
\ No newline at end of file
diff --git a/renderer/texture.cpp b/renderer/texture.cpp
index 43aac3d..10e30d0 100644
--- a/renderer/texture.cpp
+++ b/renderer/texture.cpp
@@ -1,3 +1,37 @@
 //
 // Created by Vicente Ferrari Smith on 01.03.26.
 //
+
+#include "texture.h"
+#include "graphics.h"
+
+TextureManager::TextureManager() {
+
+}
+
+Texture TextureManager::load(const std::string& path) {
+    // Dedup: Don't load the same file twice!
+    // if (path_to_id.contains(path)) return path_to_id[path];
+
+    int w, h, ch;
+    unsigned char* data = stbi_load(path.c_str(), &w, &h, &ch, STBI_rgb_alpha);
+
+    // Tell the renderer to make the GPU version
+    Texture res;
+    res.width = w;
+    res.height = h;
+    res.channels = STBI_rgb_alpha;
+    res.srgb = true;
+    upload_texture(w, h, data, &res);
+
+    stbi_image_free(data);
+
+    res.id = path;
+    res.path = path;
+    res.uploaded = true;
+
+    textures[path] = res;
+    // path_to_id[path] = id;
+
+    return res; // This is the textureID for your sprites
+}
diff --git a/renderer/texture.h b/renderer/texture.h
index ad9f0ee..3d272e3 100644
--- a/renderer/texture.h
+++ b/renderer/texture.h
@@ -2,15 +2,15 @@
 // Created by Vicente Ferrari Smith on 14.02.26.
 //
 
-#ifndef V_TEXTURE_H
-#define V_TEXTURE_H
+#ifndef TEXTURE_H
+#define TEXTURE_H
 
 #include <string>
 #include <unordered_map>
-#include <volk/volk.h>
-#include <vma/vk_mem_alloc.h>
+#include <stb_image.h>
 
 struct Renderer;
+struct PlatformTexture;
 
 typedef std::string texture_id;
 
@@ -26,9 +26,7 @@ struct Texture {
     bool srgb;
     bool uploaded;
 
-    VkImage image;
-    VmaAllocation allocation;
-    VkImageView view;
+    PlatformTexture *p_texture;
     uint32_t descriptor_index;
 };
 
@@ -36,9 +34,9 @@ struct TextureManager {
     std::unordered_map<texture_id, Texture> textures;
 
     TextureManager();
-    Texture load(const std::string& path, Renderer &renderer);
+    Texture load(const std::string& path);
 };
 
 inline TextureManager texture_manager;
 
-#endif //V_TEXTURE_H
\ No newline at end of file
+#endif //TEXTURE_H
\ No newline at end of file
diff --git a/renderer/texture_sheet.cpp b/renderer/texture_sheet.cpp
index c984cd1..54342e6 100644
--- a/renderer/texture_sheet.cpp
+++ b/renderer/texture_sheet.cpp
@@ -2,4 +2,4 @@
 // Created by Vicente Ferrari Smith on 14.02.26.
 //
 
-#include "../texture_sheet.h"
+#include "texture_sheet.h"
diff --git a/renderer/texture_sheet.h b/renderer/texture_sheet.h
index fc0aae4..5adc593 100644
--- a/renderer/texture_sheet.h
+++ b/renderer/texture_sheet.h
@@ -7,7 +7,6 @@
 
 #include <string>
 #include <glm/glm.hpp>
-
 #include "texture.h"
 
 inline const std::string TEXTURE_SHEETS_PATH = "data/texture_sheets";
@@ -26,7 +25,7 @@ struct TextureCell {
     int64_t cell_x;
     int64_t cell_y;
 
-    glm::vec2 st0;
+    glm::vec2 st0 = {0.0, 0.0};
     glm::vec2 st1 = {1.0, 1.0};
 };
 
diff --git a/renderer/vulkan/renderer.cpp b/renderer/vulkan/renderer.cpp
index 5dfbfc1..de93da8 100644
--- a/renderer/vulkan/renderer.cpp
+++ b/renderer/vulkan/renderer.cpp
@@ -2,11 +2,12 @@
 // Created by Vicente Ferrari Smith on 13.02.26.
 //
 
-#include "../Grpahics.h"
+#include "renderer.h"
+#include "../graphics.h"
 
 #include <print>
 
-#include "init.h"
+#include "vulkan.h"
 #include "../sprite.h"
 #include <vma/vk_mem_alloc.h>
 #include <slang/slang.h>
@@ -152,7 +153,7 @@ void Renderer::create_pipeline_layout() {
     vkCreatePipelineLayout(device, &plci, nullptr, &pipelineLayout);
 }
 
-void Renderer::createFrameResources() {
+void Renderer::create_frame_resources() {
 
     const VkSemaphoreCreateInfo seci{
         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
@@ -352,91 +353,6 @@ void Renderer::update_bindless_slot(uint32_t slot, VkImageView view, VkSampler s
     vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
 }
 
-void Renderer::upload_texture(
-    const int w,
-    const int h,
-    const void* pixels,
-    VkImage *image,
-    VmaAllocation *allocation,
-    VkImageView *view,
-    uint32_t *descriptor_index)
-{
-    VkDeviceSize imageSize = w * h * 4;
-
-    // --- 1. Create Staging Buffer (CPU Visible) ---
-    VkBuffer stagingBuffer;
-    VmaAllocation stagingAlloc;
-
-    VkBufferCreateInfo stagingBufferInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-    stagingBufferInfo.size = imageSize;
-    stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
-
-    VmaAllocationCreateInfo stagingAllocCreateInfo = {
-        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT,
-        .usage = VMA_MEMORY_USAGE_AUTO,
-    };
-
-    VmaAllocationInfo stagingResultInfo;
-    vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocCreateInfo, &stagingBuffer, &stagingAlloc, &stagingResultInfo);
-
-    // Copy raw pixels into the mapped memory provided by VMA
-    memcpy(stagingResultInfo.pMappedData, pixels, imageSize);
-
-    // --- 2. Create GPU Image (Device Local / Tiled) ---
-    VkExtent3D imageExtent = { (uint32_t) w, (uint32_t) h, 1 };
-
-    VkImageCreateInfo imageInfo = {
-        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-        .imageType = VK_IMAGE_TYPE_2D,
-        .format = VK_FORMAT_R8G8B8A8_UNORM,
-        .extent = imageExtent,
-        .mipLevels = 1,
-        .arrayLayers = 1,
-        .samples = VK_SAMPLE_COUNT_1_BIT,
-        .tiling = VK_IMAGE_TILING_OPTIMAL,
-        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
-        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED
-    };
-
-    VmaAllocationCreateInfo imageAllocCreateInfo = {
-        .usage = VMA_MEMORY_USAGE_AUTO,
-        .priority = 1.0f,
-    };
-
-    vmaCreateImage(allocator, &imageInfo, &imageAllocCreateInfo, image, allocation, nullptr);
-
-    // --- 3. The Transfer ---
-    immediate_submit([&](VkCommandBuffer cmd) {
-        // Transition image from UNDEFINED to TRANSFER_DST
-        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
-
-        VkBufferImageCopy copyRegion = {};
-        copyRegion.bufferOffset = 0;
-        copyRegion.bufferRowLength = 0;
-        copyRegion.bufferImageHeight = 0;
-        copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-        copyRegion.imageSubresource.mipLevel = 0;
-        copyRegion.imageSubresource.baseArrayLayer = 0;
-        copyRegion.imageSubresource.layerCount = 1;
-        copyRegion.imageExtent = imageExtent;
-
-        vkCmdCopyBufferToImage(cmd, stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copyRegion);
-
-        // Transition image from TRANSFER_DST to SHADER_READ_ONLY
-        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-    });
-
-    // Clean up temporary staging resources
-    vmaDestroyBuffer(allocator, stagingBuffer, stagingAlloc);
-
-    // --- 4. Finalize Handles ---
-    *view = create_image_view(*image, imageInfo.format);
-
-    // Register in your Bindless Array (Set 0, Binding 0, Index N)
-    *descriptor_index = nextTextureSlot++;
-    update_bindless_slot(*descriptor_index, *view, defaultSampler);
-}
-
 void Renderer::immediate_submit(std::function<void(VkCommandBuffer)>&& func) const {
     VkCommandBufferAllocateInfo allocInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
     allocInfo.commandPool = frames[currentFrame].commandPool; // Use a pool created with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
diff --git a/renderer/vulkan/renderer.h b/renderer/vulkan/renderer.h
index 91c455a..866d84c 100644
--- a/renderer/vulkan/renderer.h
+++ b/renderer/vulkan/renderer.h
@@ -5,9 +5,13 @@
 #ifndef V_RENDERER_H
 #define V_RENDERER_H
 
-#include "init.h"
+#include "vulkan.h"
 #include <volk/volk.h>
+#ifdef __EMSCRIPTEN__
+#include <GLFW/emscripten_glfw3.h>
+#else
 #include <GLFW/glfw3.h>
+#endif
 #define GLM_FORCE_RADIANS
 #define GLM_FORCE_DEPTH_ZERO_TO_ONE
 #define GLM_ENABLE_EXPERIMENTAL
@@ -16,14 +20,10 @@
 #include "glm/gtx/string_cast.hpp"
 #include <vma/vk_mem_alloc.h>
 #include "../sprite.h"
-#include "texture.h"
+#include "../texture.h"
 #include <misc.h>
 #include <array>
 #include <span>
-#include <slang/slang.h>
-#include <slang/slang-com-ptr.h>
-
-inline Slang::ComPtr<slang::IGlobalSession> slangGlobalSession;
 
 enum class PROJECTION_TYPE : uint8_t {
     NONE,
@@ -208,16 +208,6 @@ struct Renderer {
     void create_descriptor_pool();
     void update_bindless_slot(uint32_t slot, VkImageView view, VkSampler sampler) const;
 
-    // Returns the resource info so the Manager can store it
-    void upload_texture(
-        int w,
-        int h,
-        const void* pixels,
-        VkImage *image,
-        VmaAllocation *allocation,
-        VkImageView *view,
-        uint32_t *descriptor_index);
-
     template <typename T>
     VkPipeline create_graphics_pipeline(
         VkDevice device,
diff --git a/renderer/vulkan/vulkan.cpp b/renderer/vulkan/vulkan.cpp
index cfc45de..f0e4171 100644
--- a/renderer/vulkan/vulkan.cpp
+++ b/renderer/vulkan/vulkan.cpp
@@ -7,7 +7,7 @@
 #define VMA_IMPLEMENTATION
 #include <vma/vk_mem_alloc.h>
 
-#include "init.h"
+#include "vulkan.h"
 #include <print>
 #include <vector>
 
@@ -39,6 +39,88 @@ std::vector<VkImage> images;
 std::vector<VkImageView> imageViews;
 std::vector<VkImageLayout> imageLayouts;
 
+void upload_texture(
+    const int w,
+    const int h,
+    const void* pixels,
+    Texture *texture)
+{
+    VkDeviceSize imageSize = w * h * 4;
+
+    // --- 1. Create Staging Buffer (CPU Visible) ---
+    VkBuffer stagingBuffer;
+    VmaAllocation stagingAlloc;
+
+    VkBufferCreateInfo stagingBufferInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+    stagingBufferInfo.size = imageSize;
+    stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+    VmaAllocationCreateInfo stagingAllocCreateInfo = {
+        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT,
+        .usage = VMA_MEMORY_USAGE_AUTO,
+    };
+
+    VmaAllocationInfo stagingResultInfo;
+    vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocCreateInfo, &stagingBuffer, &stagingAlloc, &stagingResultInfo);
+
+    // Copy raw pixels into the mapped memory provided by VMA
+    memcpy(stagingResultInfo.pMappedData, pixels, imageSize);
+
+    // --- 2. Create GPU Image (Device Local / Tiled) ---
+    VkExtent3D imageExtent = { (uint32_t) w, (uint32_t) h, 1 };
+
+    VkImageCreateInfo imageInfo = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .format = VK_FORMAT_R8G8B8A8_UNORM,
+        .extent = imageExtent,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED
+    };
+
+    VmaAllocationCreateInfo imageAllocCreateInfo = {
+        .usage = VMA_MEMORY_USAGE_AUTO,
+        .priority = 1.0f,
+    };
+
+    vmaCreateImage(allocator, &imageInfo, &imageAllocCreateInfo, image, allocation, nullptr);
+
+    // --- 3. The Transfer ---
+    immediate_submit([&](VkCommandBuffer cmd) {
+        // Transition image from UNDEFINED to TRANSFER_DST
+        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+        VkBufferImageCopy copyRegion = {};
+        copyRegion.bufferOffset = 0;
+        copyRegion.bufferRowLength = 0;
+        copyRegion.bufferImageHeight = 0;
+        copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        copyRegion.imageSubresource.mipLevel = 0;
+        copyRegion.imageSubresource.baseArrayLayer = 0;
+        copyRegion.imageSubresource.layerCount = 1;
+        copyRegion.imageExtent = imageExtent;
+
+        vkCmdCopyBufferToImage(cmd, stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copyRegion);
+
+        // Transition image from TRANSFER_DST to SHADER_READ_ONLY
+        transition_image_layout(cmd, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    });
+
+    // Clean up temporary staging resources
+    vmaDestroyBuffer(allocator, stagingBuffer, stagingAlloc);
+
+    // --- 4. Finalize Handles ---
+    *view = create_image_view(*image, imageInfo.format);
+
+    // Register in your Bindless Array (Set 0, Binding 0, Index N)
+    *descriptor_index = nextTextureSlot++;
+    update_bindless_slot(*descriptor_index, *view, defaultSampler);
+}
+
 void graphics_init() {
     createInstance(window);
     createSurface(window);
@@ -46,10 +128,7 @@ void graphics_init() {
 
     createSwapchain(window);
 
-    slang::createGlobalSession(slangGlobalSession.writeRef());
-
     Renderer renderer(window);
-    texture_manager.load("assets/boy.png", renderer);
 }
 
 void graphics_deinit() {
diff --git a/renderer/vulkan/vulkan.h b/renderer/vulkan/vulkan.h
index bc280d3..eae667c 100644
--- a/renderer/vulkan/vulkan.h
+++ b/renderer/vulkan/vulkan.h
@@ -2,8 +2,8 @@
 // Created by Vicente Ferrari Smith on 12.02.26.
 //
 
-#ifndef V_INIT_H
-#define V_INIT_H
+#ifndef V_VULKAN_H
+#define V_VULKAN_H
 
 #include <volk/volk.h>
 #include <GLFW/glfw3.h>
@@ -22,4 +22,4 @@ void pickPhysicalDevice();
 void createDevice();
 
 
-#endif //V_INIT_H
+#endif //V_VULKAN_H
diff --git a/renderer/webgpu/renderer.cpp b/renderer/webgpu/renderer.cpp
index e9348e2..8d09b3f 100644
--- a/renderer/webgpu/renderer.cpp
+++ b/renderer/webgpu/renderer.cpp
@@ -3,3 +3,278 @@
 //
 
 #include "renderer.h"
+
+#include <iostream>
+
+#include "webgpu.h"
+#include "../graphics.h"
+#include <misc.h>
+#include <print>
+
+extern WGPUInstance instance;
+
+extern Device        wgpu_device;
+extern Queue         wgpu_queue;
+
+Renderer::Renderer(GLFWwindow *window) {
+    create_compute_pipeline();
+    create_render_pipeline();
+
+    // Number of floats in the buffers
+
+    WGPUBufferDescriptor inputBufferDesc = WGPU_BUFFER_DESCRIPTOR_INIT;
+    inputBufferDesc.label = toWgpuStringView("Input Buffer");
+    inputBufferDesc.size = elementCount * sizeof(float);
+    inputBufferDesc.usage = WGPUBufferUsage_Storage;
+    inputBufferDesc.mappedAtCreation = true;
+    input_buffer = wgpuDeviceCreateBuffer(wgpu_device.device, &inputBufferDesc);
+
+    WGPUBufferDescriptor outputBufferDesc = WGPU_BUFFER_DESCRIPTOR_INIT;
+    outputBufferDesc.label = toWgpuStringView("Output Buffer");
+    outputBufferDesc.size = elementCount * sizeof(float);
+    outputBufferDesc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopySrc;
+    output_buffer = wgpuDeviceCreateBuffer(wgpu_device.device, &outputBufferDesc);
+
+    WGPUBufferDescriptor stagingBufferDesc = WGPU_BUFFER_DESCRIPTOR_INIT;
+    stagingBufferDesc.label = toWgpuStringView("Staging Buffer");
+    stagingBufferDesc.size = elementCount * sizeof(float);
+    stagingBufferDesc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
+    staging_buffer = wgpuDeviceCreateBuffer(wgpu_device.device, &stagingBufferDesc);
+
+    float* inputBufferData = static_cast<float*>(
+    wgpuBufferGetMappedRange(input_buffer, 0, WGPU_WHOLE_MAP_SIZE)
+    );
+    // Write 0.0, 0.1, 0.2, 0.3, ... in inputBuffer
+    for (size_t i = 0 ; i < elementCount ; ++i) {
+        inputBufferData[i] = static_cast<float>(i) * 0.1f;
+    }
+    wgpuBufferUnmap(input_buffer);
+}
+
+void Renderer::create_compute_pipeline() {
+    std::string shader_source = read_entire_file("shader/compute.wgsl");
+
+    if (!shader_source.empty()) {
+        WGPUShaderSourceWGSL wgslSourceDesc = WGPU_SHADER_SOURCE_WGSL_INIT;
+        wgslSourceDesc.code = toWgpuStringView(shader_source);
+
+        WGPUShaderModuleDescriptor moduleDesc = WGPU_SHADER_MODULE_DESCRIPTOR_INIT;
+        moduleDesc.nextInChain = &wgslSourceDesc.chain;
+        moduleDesc.label = toWgpuStringView("Our first compute shader");
+
+        WGPUShaderModule shaderModule = wgpuDeviceCreateShaderModule(wgpu_device.device, &moduleDesc);
+
+        WGPUComputePipelineDescriptor desc = WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT;
+        desc.label = toWgpuStringView("Our simple pipeline");
+        desc.compute.module = shaderModule;
+        desc.compute.entryPoint = toWgpuStringView("main");
+
+        compute_pipeline = wgpuDeviceCreateComputePipeline(wgpu_device.device, &desc);
+    } else {
+        std::println("Couldn't load compute shader source");
+    }
+}
+
+void Renderer::create_render_pipeline() {
+
+}
+
+void Renderer::submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
+    RenderCommand cmd {};
+    cmd.pipeline = PipelineType::TexturedQuad;
+    cmd.key = {
+        (uint16_t) pos.y,
+        0,
+        (uint8_t) PipelineType::TexturedQuad
+    };
+
+    const Texture &texture = texture_manager.textures[sprite.texture];
+
+    cmd.textured_quad = {
+        .pos = pos,
+        .scale = { sprite.scale.x, sprite.scale.y },
+        .uv0 = {0, 0},
+        .uv1 = {1, 1},
+        .colour = {1, 1, 1, 1},
+        .texture = texture.p_texture->texture,
+    };
+
+    commands.push_back(cmd);
+
+    // assert(started == true, "You can't submit without having started the renderer first.");
+    // renderable : Renderable;
+    // renderable.type = .Sprite;
+    //
+    // if sprite.window_space
+    //     renderable.projection_type = .ORTHOGRAPHIC_WINDOW;
+    // else
+    //     renderable.projection_type = .ORTHOGRAPHIC_WORLD;
+    //
+    // renderable.pos                  = pos;
+    // renderable.sprite.texture_sheet = sprite.texture_sheet;
+    // renderable.sprite.texture_cell  = sprite.texture_cell;
+    // renderable.sprite.origin        = sprite.origin;
+    // renderable.sprite.scale         = sprite.scale;
+    // renderable.sprite.colour        = sprite.colour;
+    // renderable.sprite.alpha         = alpha;
+    //
+    // array_add(*renderer.renderable_list, renderable);
+}
+
+void Renderer::begin_frame() {
+    commands.clear();
+}
+
+uint32_t divideAndCeil(uint32_t p, uint32_t q) {
+    return (p + q - 1) / q;
+}
+
+void fetchBufferDataSync(
+    WGPUInstance instance,
+    WGPUBuffer bufferB,
+    std::function<void(const void*)> processBufferData
+) {
+    // We copy here what used to be in main():
+    // Context passed to `onBufferBMapped` through theuserdata pointer:
+    struct OnBufferBMappedContext {
+        bool operationEnded = false; // Turned true as soon as the callback is invoked
+        bool mappingIsSuccessful = false; // Turned true only if mapping succeeded
+    };
+
+    // This function has the type WGPUBufferMapCallback as defined in webgpu.h
+    auto onBufferBMapped = [](
+        WGPUMapAsyncStatus status,
+        struct WGPUStringView message,
+        void* userdata1,
+        void* /* userdata2 */
+    ) {
+        OnBufferBMappedContext& context = *reinterpret_cast<OnBufferBMappedContext*>(userdata1);
+        context.operationEnded = true;
+        if (status == WGPUMapAsyncStatus_Success) {
+            context.mappingIsSuccessful = true;
+        } else {
+            std::cout << "Could not map buffer B! Status: " << status << ", message: " << toStdStringView(message) << std::endl;
+        }
+    };
+
+    // We create an instance of the context shared with `onBufferBMapped`
+    OnBufferBMappedContext context;
+
+    // And we build the callback info:
+    WGPUBufferMapCallbackInfo callbackInfo = WGPU_BUFFER_MAP_CALLBACK_INFO_INIT;
+    callbackInfo.mode = WGPUCallbackMode_AllowProcessEvents;
+    callbackInfo.callback = onBufferBMapped;
+    callbackInfo.userdata1 = &context;
+
+    // And finally we launch the asynchronous operation
+    wgpuBufferMapAsync(
+        bufferB,
+        WGPUMapMode_Read,
+        0, // offset
+        WGPU_WHOLE_MAP_SIZE,
+        callbackInfo
+    );
+
+    // Process events until the map operation ended
+    wgpuInstanceProcessEvents(instance);
+    while (!context.operationEnded) {
+        sleepForMilliseconds(200);
+        wgpuInstanceProcessEvents(instance);
+    }
+
+    if (context.mappingIsSuccessful) {
+        const void* bufferData = wgpuBufferGetConstMappedRange(bufferB, 0, WGPU_WHOLE_MAP_SIZE);
+        processBufferData(bufferData);
+    }
+}
+
+void Renderer::end_frame(GLFWwindow *window) {
+
+    std::vector<WGPUBindGroupEntry> bindGroupEntries(2, WGPU_BIND_GROUP_ENTRY_INIT);
+    bindGroupEntries[0].binding = 0;
+    bindGroupEntries[0].buffer = input_buffer;
+    bindGroupEntries[1].binding = 1;
+    bindGroupEntries[1].buffer = output_buffer;
+
+    WGPUBindGroupDescriptor bindGroupDesc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
+    bindGroupDesc.entries = bindGroupEntries.data();
+    bindGroupDesc.entryCount = bindGroupEntries.size();
+    bindGroupDesc.layout = wgpuComputePipelineGetBindGroupLayout(compute_pipeline, 0);
+
+    WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(wgpu_device.device, &bindGroupDesc);
+    wgpuBindGroupLayoutRelease(bindGroupDesc.layout);
+
+    WGPUCommandEncoderDescriptor encoderDesc = WGPU_COMMAND_ENCODER_DESCRIPTOR_INIT;
+    encoderDesc.label = toWgpuStringView("My command encoder");
+    WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(wgpu_device.device, &encoderDesc);
+    WGPUComputePassEncoder computePass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+    wgpuComputePassEncoderSetPipeline(computePass, compute_pipeline);
+    wgpuComputePassEncoderSetBindGroup(computePass, 0, bindGroup, 0, nullptr);
+    uint32_t workgroupSizeX = 32; // the value specified in @workgroup_size(...)
+    uint32_t workgroupCountX = divideAndCeil((uint32_t)elementCount, workgroupSizeX);
+    // After the end of the compute pass, we copy the whole output buffer into the staging buffer
+    wgpuComputePassEncoderDispatchWorkgroups(computePass, workgroupCountX, 1, 1);
+    wgpuComputePassEncoderEnd(computePass);
+    wgpuComputePassEncoderRelease(computePass);
+    wgpuCommandEncoderCopyBufferToBuffer(encoder, output_buffer, 0, staging_buffer, 0, elementCount * sizeof(float));
+
+    WGPUCommandBufferDescriptor cmdBufferDescriptor = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT;
+    cmdBufferDescriptor.label = toWgpuStringView("Command buffer");
+    WGPUCommandBuffer command = wgpuCommandEncoderFinish(encoder, &cmdBufferDescriptor);
+    wgpuCommandEncoderRelease(encoder); // release encoder after it's finished
+
+    // Finally submit the command queue
+    std::cout << "Submitting command..." << std::endl;
+    wgpuQueueSubmit(wgpu_queue.queue, 1, &command);
+    wgpuCommandBufferRelease(command);
+    std::cout << "Command submitted." << std::endl;
+    // Removed
+    fetchBufferDataSync(instance, staging_buffer, [&](const void* data) {
+        const float* floatData = static_cast<const float*>(data);
+        std::cout << "Result: [";
+        for (size_t i = 0 ; i < elementCount ; ++i) {
+            if (i > 0) std::cout << ", ";
+            std::cout << floatData[i];
+        }
+        std::cout << "]" << std::endl;
+    });
+
+//     // Get the next target texture view
+//     WGPUTextureView target_view = get_next_surface_view();
+//     if (!target_view) return; // no surface texture, we skip this frame
+//
+//     WGPUCommandEncoderDescriptor encoderDesc = WGPU_COMMAND_ENCODER_DESCRIPTOR_INIT;
+//     encoderDesc.label = toWgpuStringView("My command encoder");
+//     WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(wgpu_device.device, &encoderDesc);
+//     WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT;
+//     WGPURenderPassColorAttachment colorAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT;
+//
+//     colorAttachment.view = target_view;
+//     colorAttachment.loadOp = WGPULoadOp_Clear;
+//     colorAttachment.storeOp = WGPUStoreOp_Store;
+//     colorAttachment.clearValue = WGPUColor{ 100.0 / 255.0, 149.0 / 255.0, 237.0 / 255.0, 1.0 };
+//
+//     renderPassDesc.colorAttachmentCount = 1;
+//     renderPassDesc.colorAttachments = &colorAttachment;
+//
+//     WGPURenderPassEncoder renderPass = wgpuCommandEncoderBeginRenderPass(encoder, &renderPassDesc);
+//     // Use the render pass here (we do nothing with the render pass for now)
+//     wgpuRenderPassEncoderEnd(renderPass);
+//     wgpuRenderPassEncoderRelease(renderPass);
+//     WGPUCommandBufferDescriptor cmdBufferDescriptor = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT;
+//     cmdBufferDescriptor.label = toWgpuStringView("Command buffer");
+//     WGPUCommandBuffer command = wgpuCommandEncoderFinish(encoder, &cmdBufferDescriptor);
+//     wgpuCommandEncoderRelease(encoder); // release encoder after it's finished
+//
+//     // Finally submit the command queue
+//     std::println("Submitting command...");
+//     wgpuQueueSubmit(wgpu_queue.queue, 1, &command);
+//     wgpuCommandBufferRelease(command);
+//     std::println("Command submitted.");
+//
+//     // At the end of the frame
+//     wgpuTextureViewRelease(target_view);
+// #ifndef __EMSCRIPTEN__
+//     wgpuSurfacePresent(wgpu_surface.surface);
+// #endif
+}
diff --git a/renderer/webgpu/renderer.h b/renderer/webgpu/renderer.h
index be8cbcf..bcfd047 100644
--- a/renderer/webgpu/renderer.h
+++ b/renderer/webgpu/renderer.h
@@ -5,12 +5,102 @@
 #ifndef V_RENDERER_H
 #define V_RENDERER_H
 
+#ifdef __EMSCRIPTEN__
+#include <GLFW/emscripten_glfw3.h>
+#else
+#include <GLFW/glfw3.h>
+#endif
+#include <glm/glm.hpp>
+#include <webgpu/webgpu.h>
 
+#include "../sprite.h"
 
-class renderer {
+enum class PROJECTION_TYPE : uint8_t {
+    NONE,
+    ORTHOGRAPHIC_WORLD,
+    ORTHOGRAPHIC_WINDOW,
+    PERSPECTIVE_WORLD,
+    PERSPECTIVE_WINDOW,
+    COUNT,
+};
 
+// commands
+
+enum class PipelineType : uint8_t {
+    None,
+    TexturedQuad,
+    ColoredQuad,
+    Line,
+    Text,
+    Chunk
+};
+
+struct TexturedQuadCmd {
+    glm::vec2 pos;
+    glm::vec2 scale;
+    glm::vec2 uv0;
+    glm::vec2 uv1;
+    glm::vec4 colour;
+    WGPUTextureDescriptor texture;
+};
+
+struct ColoredQuadCmd {
+    glm::vec2 pos;
+    glm::vec2 scale;
+    glm::vec4 colour;
+};
+
+struct LineCmd {
+    glm::vec2 start;
+    glm::vec2 end;
+    glm::vec4 color;
 };
 
 
+struct SortKey {
+    uint16_t depth; // world Z or Y-sorted depth
+    uint16_t materialID; // texture sheet, font atlas, etc.
+    uint8_t pipeline; // PipelineType
 
-#endif //V_RENDERER_H
+    bool operator<(const SortKey& b) const;
+};
+
+struct RenderCommand {
+
+    SortKey      key;
+    PipelineType pipeline;
+
+    union {
+        TexturedQuadCmd textured_quad;
+        ColoredQuadCmd  colored_quad;
+        LineCmd         line;
+        // TextCmd         text;
+        // ChunkCmd        chunk;
+    };
+};
+
+struct Renderer {
+    std::vector<RenderCommand> commands{};
+
+    WGPURenderPipeline textured_quad_pipeline;
+
+    size_t elementCount = 64;
+    WGPUComputePipeline compute_pipeline;
+
+    WGPUBuffer input_buffer;
+    WGPUBuffer output_buffer;
+    WGPUBuffer staging_buffer;
+
+    Renderer() = default;
+    Renderer(GLFWwindow *window);
+
+    void begin_frame();
+    void end_frame(GLFWwindow *window);
+    void submit_sprite(glm::vec2 pos, const sprite_t &sprite);
+
+    void create_render_pipeline();
+    void create_compute_pipeline();
+};
+
+
+#endif //V_RENDERER_H
\ No newline at end of file
diff --git a/renderer/webgpu/utils_emscripten.h b/renderer/webgpu/utils_emscripten.h
index a6e37d9..8f0f140 100644
--- a/renderer/webgpu/utils_emscripten.h
+++ b/renderer/webgpu/utils_emscripten.h
@@ -33,47 +33,8 @@
 #error "utils_emscripten.cpp: This file requires EMSCRIPTEN to be defined."
 #endif  // !defined(EMSCRIPTEN)
 
-#include <memory>
-
 #include "GLFW/glfw3.h"
-#include "emscripten/emscripten.h"
 #include "webgpu/webgpu_glfw.h"
 
 WGPU_GLFW_EXPORT WGPUSurface wgpuGlfwCreateSurfaceForWindow(const WGPUInstance instance,
-                                                            GLFWwindow* window) {
-    wgpu::Surface s = wgpu::glfw::CreateSurfaceForWindow(instance, window);
-    return s.MoveToCHandle();
-}
-
-namespace wgpu::glfw {
-
-wgpu::Surface CreateSurfaceForWindow(const wgpu::Instance& instance, GLFWwindow* window) {
-    auto chainedDescriptor = SetupWindowAndGetSurfaceDescriptor(window);
-
-    wgpu::SurfaceDescriptor descriptor;
-    descriptor.nextInChain = chainedDescriptor.get();
-    wgpu::Surface surface = instance.CreateSurface(&descriptor);
-
-    return surface;
-}
-
-std::unique_ptr<wgpu::ChainedStruct, void (*)(wgpu::ChainedStruct*)>
-SetupWindowAndGetSurfaceDescriptor(GLFWwindow* window) {
-    if (glfwGetWindowAttrib(window, GLFW_CLIENT_API) != GLFW_NO_API) {
-        emscripten_log(EM_LOG_ERROR,
-                       "GL context was created on the window. Disable context creation by "
-                       "setting the GLFW_CLIENT_API hint to GLFW_NO_API.");
-        return {nullptr, [](wgpu::ChainedStruct*) {}};
-    }
-
-    wgpu::EmscriptenSurfaceSourceCanvasHTMLSelector* desc =
-        new wgpu::EmscriptenSurfaceSourceCanvasHTMLSelector();
-    // Map "!canvas" CSS selector to the canvas held in the Module.canvas object.
-    EM_ASM({self.specialHTMLTargets && (specialHTMLTargets["!canvas"] = Module.canvas)});
-    desc->selector = "!canvas";
-    return {desc, [](wgpu::ChainedStruct* desc) {
-                delete reinterpret_cast<wgpu::EmscriptenSurfaceSourceCanvasHTMLSelector*>(desc);
-            }};
-}
-
-}  // namespace wgpu::glfw
+                                                            GLFWwindow* window);
diff --git a/renderer/webgpu/webgpu.cpp b/renderer/webgpu/webgpu.cpp
index d7c02a4..495f85d 100644
--- a/renderer/webgpu/webgpu.cpp
+++ b/renderer/webgpu/webgpu.cpp
@@ -3,3 +3,425 @@
 //
 
 #include "webgpu.h"
+#include <webgpu/webgpu.h>
+#include <dawn/webgpu_cpp_print.h>
+#include "renderer.h"
+#include "../graphics.h"
+#include <emscripten.h>
+#include <emscripten/emscripten.h>
+#include <iostream>
+#include "utils_emscripten.h"
+
+WGPUInstance instance;
+WGPUAdapter adapter;
+Device   wgpu_device;
+Queue    wgpu_queue;
+Surface  wgpu_surface;
+
+Renderer renderer;
+
+void upload_texture(
+    const int w,
+    const int h,
+    const void *pixels,
+    Texture *texture)
+{
+
+}
+
+std::string_view toStdStringView(WGPUStringView wgpuStringView) {
+    return
+        wgpuStringView.data == nullptr
+        ? std::string_view()
+        : wgpuStringView.length == WGPU_STRLEN
+        ? std::string_view(wgpuStringView.data)
+        : std::string_view(wgpuStringView.data, wgpuStringView.length);
+}
+
+WGPUStringView toWgpuStringView(std::string_view stdStringView) {
+    return { stdStringView.data(), stdStringView.size() };
+}
+WGPUStringView toWgpuStringView(const char* cString) {
+    return { cString, WGPU_STRLEN };
+}
+
+void sleepForMilliseconds(unsigned int milliseconds) {
+#ifdef __EMSCRIPTEN__
+    emscripten_sleep(milliseconds);
+#else
+    std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds));
+#endif
+}
+
+/**
+ * Utility function to get a WebGPU adapter, so that
+ *     WGPUAdapter adapter = requestAdapterSync(options);
+ * is roughly equivalent to the JavaScript
+ *     const adapter = await navigator.gpu.requestAdapter(options);
+ */
+WGPUAdapter requestAdapterSync(WGPUInstance instance, WGPURequestAdapterOptions const * options) {
+    // A simple structure holding the local information shared with the
+    // onAdapterRequestEnded callback.
+    struct UserData {
+        WGPUAdapter adapter = nullptr;
+        bool requestEnded = false;
+    };
+    UserData userData;
+
+    // Callback called by wgpuInstanceRequestAdapter when the request returns
+    // This is a C++ lambda function, but could be any function defined in the
+    // global scope. It must be non-capturing (the brackets [] are empty) so
+    // that it behaves like a regular C function pointer, which is what
+    // wgpuInstanceRequestAdapter expects (WebGPU being a C API). The workaround
+    // is to convey what we want to capture through the userdata1 pointer,
+    // provided as the last argument of wgpuInstanceRequestAdapter and received
+    // by the callback as its last argument.
+    auto onAdapterRequestEnded = [](
+        WGPURequestAdapterStatus status,
+        WGPUAdapter adapter,
+        WGPUStringView message,
+        void* userdata1,
+        void* /* userdata2 */
+    ) {
+        UserData& userData = *reinterpret_cast<UserData*>(userdata1);
+        if (status == WGPURequestAdapterStatus_Success) {
+            userData.adapter = adapter;
+        } else {
+            std::cerr << "Error while requesting adapter: " << toStdStringView(message) << std::endl;
+        }
+        userData.requestEnded = true;
+    };
+
+    // Build the callback info
+    WGPURequestAdapterCallbackInfo callbackInfo = {
+        /* nextInChain = */ nullptr,
+        /* mode = */ WGPUCallbackMode_AllowProcessEvents,
+        /* callback = */ onAdapterRequestEnded,
+        /* userdata1 = */ &userData,
+        /* userdata2 = */ nullptr
+    };
+
+    // Call to the WebGPU request adapter procedure
+    wgpuInstanceRequestAdapter(instance, options, callbackInfo);
+
+    // We wait until userData.requestEnded gets true
+
+    // Hand the execution to the WebGPU instance so that it can check for
+    // pending async operations, in which case it invokes our callbacks.
+    // NB: We test once before the loop not to wait for 200ms in case it is
+    // already ready
+    wgpuInstanceProcessEvents(instance);
+
+    while (!userData.requestEnded) {
+        // Waiting for 200 ms to avoid asking too often to process events
+        sleepForMilliseconds(200);
+
+        wgpuInstanceProcessEvents(instance);
+    }
+
+    return userData.adapter;
+}
+
+/**
+ * Utility function to get a WebGPU device, so that
+ *     WGPUDevice device = requestDeviceSync(adapter, options);
+ * is roughly equivalent to
+ *     const device = await adapter.requestDevice(descriptor);
+ * It is very similar to requestAdapter
+ */
+WGPUDevice requestDeviceSync(WGPUInstance instance, WGPUAdapter adapter, WGPUDeviceDescriptor const * descriptor) {
+    struct UserData {
+        WGPUDevice device = nullptr;
+        bool requestEnded = false;
+    };
+    UserData userData;
+
+    // The callback
+    auto onDeviceRequestEnded = [](
+        WGPURequestDeviceStatus status,
+        WGPUDevice device,
+        WGPUStringView message,
+        void* userdata1,
+        void* /* userdata2 */
+    ) {
+        UserData& userData = *reinterpret_cast<UserData*>(userdata1);
+        if (status == WGPURequestDeviceStatus_Success) {
+            userData.device = device;
+        } else {
+            std::cerr << "Error while requesting device: " << toStdStringView(message) << std::endl;
+        }
+        userData.requestEnded = true;
+    };
+
+    // Build the callback info
+    WGPURequestDeviceCallbackInfo callbackInfo = {
+        /* nextInChain = */ nullptr,
+        /* mode = */ WGPUCallbackMode_AllowProcessEvents,
+        /* callback = */ onDeviceRequestEnded,
+        /* userdata1 = */ &userData,
+        /* userdata2 = */ nullptr
+    };
+
+    // Call to the WebGPU request adapter procedure
+    wgpuAdapterRequestDevice(adapter, descriptor, callbackInfo);
+
+    // Hand the execution to the WebGPU instance until the request ended
+    wgpuInstanceProcessEvents(instance);
+    while (!userData.requestEnded) {
+        sleepForMilliseconds(200);
+        wgpuInstanceProcessEvents(instance);
+    }
+
+    return userData.device;
+}
+
+void inspectAdapter(WGPUAdapter adapter) {
+    WGPULimits supportedLimits = {};
+    supportedLimits.nextInChain = nullptr;
+
+    bool success = wgpuAdapterGetLimits(adapter, &supportedLimits) == WGPUStatus_Success;
+
+    if (success) {
+        std::cout << "Adapter limits:" << std::endl;
+        std::cout << " - maxTextureDimension1D: " << supportedLimits.maxTextureDimension1D << std::endl;
+        std::cout << " - maxTextureDimension2D: " << supportedLimits.maxTextureDimension2D << std::endl;
+        std::cout << " - maxTextureDimension3D: " << supportedLimits.maxTextureDimension3D << std::endl;
+        std::cout << " - maxTextureArrayLayers: " << supportedLimits.maxTextureArrayLayers << std::endl;
+    }
+
+    // Prepare the struct where features will be listed
+    WGPUSupportedFeatures features;
+
+    // Get adapter features. This may allocate memory that we must later free with wgpuSupportedFeaturesFreeMembers()
+    wgpuAdapterGetFeatures(adapter, &features);
+
+    std::cout << "Adapter features:" << std::endl;
+    std::cout << std::hex; // Write integers as hexadecimal to ease comparison with webgpu.h literals
+    for (size_t i = 0; i < features.featureCount; ++i) {
+        std::cout << " - 0x" << features.features[i] << std::endl;
+    }
+    std::cout << std::dec; // Restore decimal numbers
+
+    // Free the memory that had potentially been allocated by wgpuAdapterGetFeatures()
+    wgpuSupportedFeaturesFreeMembers(features);
+    // One shall no longer use features beyond this line.
+
+    WGPUAdapterInfo properties;
+    properties.nextInChain = nullptr;
+    wgpuAdapterGetInfo(adapter, &properties);
+    std::cout << "Adapter properties:" << std::endl;
+    std::cout << " - vendorID: " << properties.vendorID << std::endl;
+    std::cout << " - vendorName: " << toStdStringView(properties.vendor) << std::endl;
+    std::cout << " - architecture: " << toStdStringView(properties.architecture) << std::endl;
+    std::cout << " - deviceID: " << properties.deviceID << std::endl;
+    std::cout << " - name: " << toStdStringView(properties.device) << std::endl;
+    std::cout << " - driverDescription: " << toStdStringView(properties.description) << std::endl;
+    std::cout << std::hex;
+    std::cout << " - adapterType: 0x" << properties.adapterType << std::endl;
+    std::cout << " - backendType: 0x" << properties.backendType << std::endl;
+    std::cout << std::dec; // Restore decimal numbers
+    wgpuAdapterInfoFreeMembers(properties);
+}
+
+// We create a utility function to inspect the device:
+void inspectDevice(WGPUDevice device) {
+
+    WGPUSupportedFeatures features = WGPU_SUPPORTED_FEATURES_INIT;
+    wgpuDeviceGetFeatures(device, &features);
+    std::cout << "Device features:" << std::endl;
+    std::cout << std::hex;
+    for (size_t i = 0; i < features.featureCount; ++i) {
+        std::cout << " - 0x" << features.features[i] << std::endl;
+    }
+    std::cout << std::dec;
+    wgpuSupportedFeaturesFreeMembers(features);
+
+    WGPULimits limits = WGPU_LIMITS_INIT;
+    bool success = wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success;
+
+    if (success) {
+		std::cout << "Device limits:" << std::endl;
+		std::cout << " - maxTextureDimension1D: " << limits.maxTextureDimension1D << std::endl;
+		std::cout << " - maxTextureDimension2D: " << limits.maxTextureDimension2D << std::endl;
+		std::cout << " - maxTextureDimension3D: " << limits.maxTextureDimension3D << std::endl;
+		std::cout << " - maxTextureArrayLayers: " << limits.maxTextureArrayLayers << std::endl;
+		std::cout << " - maxBindGroups: " << limits.maxBindGroups << std::endl;
+		std::cout << " - maxBindGroupsPlusVertexBuffers: " << limits.maxBindGroupsPlusVertexBuffers << std::endl;
+		std::cout << " - maxBindingsPerBindGroup: " << limits.maxBindingsPerBindGroup << std::endl;
+		std::cout << " - maxDynamicUniformBuffersPerPipelineLayout: " << limits.maxDynamicUniformBuffersPerPipelineLayout << std::endl;
+		std::cout << " - maxDynamicStorageBuffersPerPipelineLayout: " << limits.maxDynamicStorageBuffersPerPipelineLayout << std::endl;
+		std::cout << " - maxSampledTexturesPerShaderStage: " << limits.maxSampledTexturesPerShaderStage << std::endl;
+		std::cout << " - maxSamplersPerShaderStage: " << limits.maxSamplersPerShaderStage << std::endl;
+		std::cout << " - maxStorageBuffersPerShaderStage: " << limits.maxStorageBuffersPerShaderStage << std::endl;
+		std::cout << " - maxStorageTexturesPerShaderStage: " << limits.maxStorageTexturesPerShaderStage << std::endl;
+		std::cout << " - maxUniformBuffersPerShaderStage: " << limits.maxUniformBuffersPerShaderStage << std::endl;
+		std::cout << " - maxUniformBufferBindingSize: " << limits.maxUniformBufferBindingSize << std::endl;
+		std::cout << " - maxStorageBufferBindingSize: " << limits.maxStorageBufferBindingSize << std::endl;
+		std::cout << " - minUniformBufferOffsetAlignment: " << limits.minUniformBufferOffsetAlignment << std::endl;
+		std::cout << " - minStorageBufferOffsetAlignment: " << limits.minStorageBufferOffsetAlignment << std::endl;
+		std::cout << " - maxVertexBuffers: " << limits.maxVertexBuffers << std::endl;
+		std::cout << " - maxBufferSize: " << limits.maxBufferSize << std::endl;
+		std::cout << " - maxVertexAttributes: " << limits.maxVertexAttributes << std::endl;
+		std::cout << " - maxVertexBufferArrayStride: " << limits.maxVertexBufferArrayStride << std::endl;
+		std::cout << " - maxInterStageShaderVariables: " << limits.maxInterStageShaderVariables << std::endl;
+		std::cout << " - maxColorAttachments: " << limits.maxColorAttachments << std::endl;
+		std::cout << " - maxColorAttachmentBytesPerSample: " << limits.maxColorAttachmentBytesPerSample << std::endl;
+		std::cout << " - maxComputeWorkgroupStorageSize: " << limits.maxComputeWorkgroupStorageSize << std::endl;
+		std::cout << " - maxComputeInvocationsPerWorkgroup: " << limits.maxComputeInvocationsPerWorkgroup << std::endl;
+		std::cout << " - maxComputeWorkgroupSizeX: " << limits.maxComputeWorkgroupSizeX << std::endl;
+		std::cout << " - maxComputeWorkgroupSizeY: " << limits.maxComputeWorkgroupSizeY << std::endl;
+		std::cout << " - maxComputeWorkgroupSizeZ: " << limits.maxComputeWorkgroupSizeZ << std::endl;
+		std::cout << " - maxComputeWorkgroupsPerDimension: " << limits.maxComputeWorkgroupsPerDimension << std::endl;
+		// std::cout << " - maxStorageBuffersInVertexStage: " << limits.maxStorageBuffersInVertexStage << std::endl;
+		// std::cout << " - maxStorageTexturesInVertexStage: " << limits.maxStorageTexturesInVertexStage << std::endl;
+		// std::cout << " - maxStorageBuffersInFragmentStage: " << limits.maxStorageBuffersInFragmentStage << std::endl;
+		// std::cout << " - maxStorageTexturesInFragmentStage: " << limits.maxStorageTexturesInFragmentStage << std::endl;
+	}
+}
+
+void create_surface(GLFWwindow *window) {
+    wgpu_surface.surface = wgpuGlfwCreateSurfaceForWindow(instance, window);
+
+    WGPUSurfaceConfiguration config = WGPU_SURFACE_CONFIGURATION_INIT;
+    config.width = 640;
+    config.height = 480;
+    config.device = wgpu_device.device;
+
+    // We initialize an empty capability struct:
+    WGPUSurfaceCapabilities capabilities = WGPU_SURFACE_CAPABILITIES_INIT;
+
+    // We get the capabilities for a pair of (surface, adapter).
+    // If it works, this populates the `capabilities` structure
+    WGPUStatus status = wgpuSurfaceGetCapabilities(wgpu_surface.surface, adapter, &capabilities);
+    if (status != WGPUStatus_Success) {
+        return;
+    }
+
+    // From the capabilities, we get the preferred format: it is always the first one!
+    // (NB: There is always at least 1 format if the GetCapabilities was successful)
+    config.format = capabilities.formats[0];
+
+    // We no longer need to access the capabilities, so we release their memory.
+    wgpuSurfaceCapabilitiesFreeMembers(capabilities);
+
+    wgpuSurfaceConfigure(wgpu_surface.surface, &config);
+}
+
+WGPUTextureView get_next_surface_view() {
+    WGPUSurfaceTexture surfaceTexture = WGPU_SURFACE_TEXTURE_INIT;
+    wgpuSurfaceGetCurrentTexture(wgpu_surface.surface, &surfaceTexture);
+
+    WGPUTextureViewDescriptor viewDescriptor = WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT;
+    viewDescriptor.label = toWgpuStringView("Surface texture view");
+    viewDescriptor.dimension = WGPUTextureViewDimension_2D; // not to confuse with 2DArray
+    WGPUTextureView target_view = wgpuTextureCreateView(surfaceTexture.texture, &viewDescriptor);
+
+    // We no longer need the texture, only its view,
+    // so we release it at the end of GetNextSurfaceViewData
+    wgpuTextureRelease(surfaceTexture.texture);
+    return target_view;
+}
+
+void create_device() {
+    std::cout << "Requesting adapter..." << std::endl;
+
+    WGPURequestAdapterOptions adapterOpts = {};
+    adapterOpts.nextInChain = nullptr;
+    adapter = requestAdapterSync(instance, &adapterOpts);
+
+    std::cout << "Got adapter: " << adapter << std::endl;
+
+    inspectAdapter(adapter);
+
+    std::cout << "Requesting device..." << std::endl;
+
+    WGPUDeviceDescriptor deviceDesc = WGPU_DEVICE_DESCRIPTOR_INIT;
+    // Any name works here, that's your call
+    deviceDesc.label = toWgpuStringView("My Device");
+    std::vector<WGPUFeatureName> features;
+    // No required feature for now
+    deviceDesc.requiredFeatureCount = features.size();
+    deviceDesc.requiredFeatures = features.data();
+    // Make sure 'features' lives until the call to wgpuAdapterRequestDevice!
+    WGPULimits requiredLimits = WGPU_LIMITS_INIT;
+    // We leave 'requiredLimits' untouched for now
+    deviceDesc.requiredLimits = &requiredLimits;
+    // Make sure that the 'requiredLimits' variable lives until the call to wgpuAdapterRequestDevice!
+    deviceDesc.defaultQueue.label = toWgpuStringView("The Default Queue");
+    auto onDeviceLost = [](
+        WGPUDevice const * device,
+        WGPUDeviceLostReason reason,
+        struct WGPUStringView message,
+        void* /* userdata1 */,
+        void* /* userdata2 */
+    ) {
+        // All we do is display a message when the device is lost
+        std::cout
+            << "Device " << device << " was lost: reason " << reason
+            << " (" << toStdStringView(message) << ")"
+            << std::endl;
+    };
+    deviceDesc.deviceLostCallbackInfo.callback = onDeviceLost;
+    deviceDesc.deviceLostCallbackInfo.mode = WGPUCallbackMode_AllowProcessEvents;
+    auto onDeviceError = [](
+        WGPUDevice const * device,
+        WGPUErrorType type,
+        struct WGPUStringView message,
+        void* /* userdata1 */,
+        void* /* userdata2 */
+    ) {
+        std::cout
+            << "Uncaptured error in device " << device << ": type " << type
+            << " (" << toStdStringView(message) << ")"
+            << std::endl;
+    };
+    deviceDesc.uncapturedErrorCallbackInfo.callback = onDeviceError;
+    wgpu_device.device = requestDeviceSync(instance, adapter, &deviceDesc);
+
+    std::cout << "Got device: " << wgpu_device.device << std::endl;
+    // We no longer need to access the adapter once we have the device
+    wgpuAdapterRelease(adapter);
+    inspectDevice(wgpu_device.device);
+
+    wgpu_queue.queue = wgpuDeviceGetQueue(wgpu_device.device);
+}
+
+void create_instance() {
+    // We create a descriptor
+    WGPUInstanceDescriptor desc = WGPU_INSTANCE_DESCRIPTOR_INIT;
+    desc.nextInChain = nullptr;
+
+    // We create the instance using this descriptor
+#ifdef __EMSCRIPTEN__
+    instance = wgpuCreateInstance(nullptr);
+#else
+    instance = wgpuCreateInstance(&desc);
+#endif
+}
+
+void platform_graphics_init(GLFWwindow *window) {
+    create_instance();
+
+    create_device();
+
+    create_surface(window);
+
+    renderer = Renderer(window);
+}
+
+void graphics_deinit() {
+
+}
+
+void begin_frame() {
+    renderer.begin_frame();
+}
+
+void end_frame(GLFWwindow *window) {
+
+    renderer.end_frame(window);
+}
+
+void submit_sprite(glm::vec2 pos, const sprite_t &sprite) {
+    renderer.submit_sprite({pos.x, pos.y}, sprite);
+}
diff --git a/renderer/webgpu/webgpu.h b/renderer/webgpu/webgpu.h
index 733cee9..20d5d8e 100644
--- a/renderer/webgpu/webgpu.h
+++ b/renderer/webgpu/webgpu.h
@@ -5,12 +5,34 @@
 #ifndef V_WEBGPU_H
 #define V_WEBGPU_H
 
+#include <string_view>
+#include <webgpu/webgpu.h>
 
-
-class webgpu {
-
+struct Device {
+    WGPUDevice device;
 };
 
+struct PlatformTexture {
+    WGPUTextureDescriptor texture;
+};
+
+struct Queue {
+    WGPUQueue queue;
+};
+
+struct Surface {
+    WGPUSurface surface;
+};
+
+std::string_view toStdStringView(WGPUStringView wgpuStringView);
+
+WGPUStringView toWgpuStringView(std::string_view stdStringView);
+
+WGPUStringView toWgpuStringView(const char* cString);
+
+WGPUTextureView get_next_surface_view();
+
+void sleepForMilliseconds(unsigned int milliseconds);
 
 
-#endif //V_WEBGPU_H
+#endif //V_WEBGPU_H
\ No newline at end of file
diff --git a/shaders/compute.slang b/shaders/compute.slang
index e69de29..1e3fb68 100644
--- a/shaders/compute.slang
+++ b/shaders/compute.slang
@@ -0,0 +1,9 @@
+StructuredBuffer<float> input_buffer;
+RWStructuredBuffer<float> output_buffer;
+
+[shader("compute")]
+[numthreads(32,1,1)]
+void main(uint3 thread_id : SV_DispatchThreadID)
+{
+    output_buffer[thread_id.x] = 2.0 * input_buffer[thread_id.x];
+}
\ No newline at end of file
diff --git a/shaders/shader.metal b/shaders/shader.metal
index e69de29..5dcd5d2 100644
--- a/shaders/shader.metal
+++ b/shaders/shader.metal
@@ -0,0 +1,52 @@
+#include <metal_stdlib>
+#include <vertex_data.h>
+
+using namespace metal;
+
+struct Vertex {
+    float2 pos;
+    float2 uv;
+};
+
+constant Vertex vertices[6] = {
+    {{-0.5,  0.5}, {0.0, 1.0}},
+    {{-0.5, -0.5}, {0.0, 0.0}},
+    {{ 0.5,  0.5}, {1.0, 1.0}},
+
+    {{ 0.5,  0.5}, {1.0, 1.0}},
+    {{-0.5, -0.5}, {0.0, 0.0}},
+    {{ 0.5, -0.5}, {1.0, 0.0}},
+};
+
+struct VertexOut {
+    float4 pos [[position]];
+    float2 uv;
+    float4 color;
+};
+
+vertex VertexOut vertex_main(
+    uint vertexID [[vertex_id]],
+    constant vertex_p2_s2_uv2_c4_a1* in,
+    constant simd::float4x4 *ortho)
+{
+    Vertex v = vertices[vertexID % 6];
+
+    VertexOut out;
+    out.pos = (*ortho) * (float4((v.pos * in[vertexID].scale) + in[vertexID].pos, 0.0, 1.0));
+    out.uv = v.uv;
+    out.color = in[vertexID].color;
+
+    return out;
+}
+
+fragment float4 fragment_main(VertexOut in [[stage_in]], texture2d<float> colorTexture [[texture(0)]]) {
+    constexpr sampler textureSampler (mag_filter::linear, min_filter::linear);
+
+    // Sample the texture to obtain a color
+    const float4 colorSample = colorTexture.sample(textureSampler, in.uv);
+    //float2 srgbOut = select(1.292 * in.uv,
+    //                        1.055 * pow(in.uv, 1.0/2.4) - 0.055,
+    //                        in.uv > 0.0031308);
+    return colorSample;
+    //return float4(srgbOut, 0.0, in.color.a);
+}
diff --git a/shaders/shader.slang b/shaders/shader.slang
index 03f4032..a748de9 100644
--- a/shaders/shader.slang
+++ b/shaders/shader.slang
@@ -3,8 +3,7 @@ struct VSInput {
     float2 scale;
     float2 uv;
     float4 color;
-    float alpha;
-    uint32_t textureID;
+    float  alpha;
 
     uint vertex_index : SV_VertexID;
 };
@@ -14,10 +13,17 @@ struct VSOutput {
     float2 uv;
     float4 color;
     float alpha;
-    uint32_t tex_id;
+    //uint32_t tex_id;
 };
 
-Sampler2D textures[];
+struct Uniforms {
+    float4x4 proj;
+};
+
+ConstantBuffer<Uniforms> uniforms : register(b1);
+
+Texture2D colorTexture : register(t0);
+SamplerState samplerState;
 
 static const float2 square[6] = {
     float2(-0.5, -0.5), // Top-left
@@ -30,23 +36,23 @@ static const float2 square[6] = {
 };
 
 [shader ("vertex")]
-VSOutput main(VSInput input, uniform float4x4 proj) {
+VSOutput vs_main(VSInput input) {
     VSOutput output;
 
     float2 vertex_pos = square[input.vertex_index % 6];
     float2 final_pos = (vertex_pos * input.scale) + input.pos;
 
-    output.pos = mul(proj, float4(final_pos, 0.0, 1.0));
+    output.pos = mul(uniforms.proj, float4(final_pos, 0.0, 1.0));
     output.uv = input.uv;
     output.color = input.color;
     output.alpha = input.alpha;
-    output.tex_id = input.textureID;
+    //output.tex_id = input.textureID;
 
     return output;
 }
 
 [shader("fragment")]
-float4 main(VSOutput input) {
-
-    return float4(input.color.rgb, input.alpha);
+float4 fs_main(VSOutput input) {
+    float4 texColor = colorTexture.Sample(samplerState, input.uv);
+    return texColor * float4(input.color.rgb, input.alpha);
 }