-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathaxpy.cc
More file actions
45 lines (39 loc) · 1.38 KB
/
axpy.cc
File metadata and controls
45 lines (39 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
// SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#define NOCPP
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define REL_WRITE 0
#define REL_READ 1
#include <aie_api/aie.hpp>
extern "C" {
void saxpy(bfloat16 *restrict x, bfloat16 *restrict y, const float a, bfloat16 *restrict z, const int32_t vector_size)
{
event0();
::aie::vector<bfloat16, 64> a_v =
::aie::broadcast<bfloat16, 64>(aie::to_float<bfloat16>(a, 0)); // Convert to bfloat16
// #pragma clang loop min_iteration_count(4)
for (int i = 0; i < vector_size; i += 64) {
::aie::vector<bfloat16, 64> x_v = ::aie::load_v<64>(x);
x += 64;
::aie::vector<bfloat16, 64> y_v = ::aie::load_v<64>(y);
y += 64;
::aie::accum<accfloat, 64> ax_v = ::aie::mul(x_v, a_v);
::aie::accum<accfloat, 64> z_v = ::aie::add(ax_v, y_v);
::aie::vector<bfloat16, 64> z_v_converted = z_v.to_vector<bfloat16>();
::aie::store_v(z, z_v_converted);
z += 64;
}
event1();
}
void saxpy_scalar(bfloat16 *x, bfloat16 *y, const bfloat16 a, bfloat16 *z, const int32_t vector_size)
{
event0();
float a_f = a;
for (int i = 0; i < vector_size; ++i) {
z[i] = a_f * x[i] + y[i];
}
event1();
}
}