Skip to content

Commit d12594d

Browse files
committed
wip, setting up kit
1 parent dc9148b commit d12594d

File tree

3 files changed

+17
-22
lines changed

3 files changed

+17
-22
lines changed

linalg/src/x86_64_fma/mmm.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use crate::frame::PackedFormat;
33
use crate::mmm::MMMKit;
44
use crate::mmm::MatMatMulKer;
55
use crate::Ops;
6-
use panel_extract::packed_32_f16_to_f32;
7-
use panel_extract::packed_32_q40_to_f32;
6+
use panel_extract::fma_packed_32_f16_to_f32;
7+
use panel_extract::fma_packed_32_q40_to_f32;
88
use tract_data::internal::*;
99
use DatumType::*;
1010

@@ -63,17 +63,17 @@ pub fn plug(ops: &mut Ops) {
6363
ops.mmm_kits.push(MMMKit::new_for_mmm(fma_mmm_f32_32x1.mmm(), 1).with_extracting(
6464
fma_mmm_f32_32x3.mmm(),
6565
0,
66-
packed_32_q40_to_f32.clone(),
66+
fma_packed_32_q40_to_f32.clone(),
6767
));
6868
ops.mmm_kits.push(MMMKit::new_for_mmm(fma_mmm_f32_32x1.mmm(), 2).with_extracting(
6969
fma_mmm_f32_32x3.mmm(),
7070
1,
71-
packed_32_q40_to_f32.clone(),
71+
fma_packed_32_q40_to_f32.clone(),
7272
));
7373
ops.mmm_kits.push(
7474
MMMKit::new(F16, F32, F16, &PackedFormat::new(F16, 32, 32))
7575
.with_native(fma_mmm_f32_32x1.mmm(), 3)
76-
.with_extracting(fma_mmm_f32_32x3.mmm(), 1, packed_32_f16_to_f32.clone()),
76+
.with_extracting(fma_mmm_f32_32x3.mmm(), 1, fma_packed_32_f16_to_f32.clone()),
7777
);
7878
}
7979
}

linalg/src/x86_64_fma/panel_extract.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,25 @@ use crate::Ops;
44
use tract_data::internal::*;
55

66
pub fn plug(ops: &mut Ops) {
7-
ops.panel_extractors.extend([packed_32_q40_to_f32.clone(), packed_32_f16_to_f32.clone()]);
7+
ops.panel_extractors.extend([
8+
fma_packed_32_q40_to_f32.clone(),
9+
fma_packed_32_f16_to_f32.clone(),
10+
avx512_packed_128_q40_to_f32.clone(),
11+
]);
812
}
913

10-
panel_extractor!(kernel_packed_32_q40_to_f32 as packed_32_q40_to_f32(
14+
panel_extractor!(kernel_packed_32_q40_to_f32 as fma_packed_32_q40_to_f32(
1115
Box::new(super::mmm::pq40_r32()),
1216
PackedFormat::new(f32::datum_type(), 32, 32)
1317
) where(AVX2));
1418

15-
panel_extractor!(kernel_packed_32_f16_to_f32 as packed_32_f16_to_f32(
19+
panel_extractor!(kernel_packed_32_f16_to_f32 as fma_packed_32_f16_to_f32(
1620
Box::new(PackedFormat::new(f16::datum_type(), 32, 32)),
1721
PackedFormat::new(f32::datum_type(), 32, 32)
1822
) where(AVX2));
1923

20-
panel_extractor!(kernel_packed_128_q40_to_f32::kernel as packed_128_q40_to_f32(
21-
Box::new(super::mmm::PQ40_R128),
24+
panel_extractor!(kernel_packed_128_q40_to_f32::kernel as avx512_packed_128_q40_to_f32(
25+
Box::new(super::mmm::pq40_r128()),
2226
PackedFormat::new(f32::datum_type(), 128, 32)
2327
) where(AVX512F));
2428

linalg/x86_64/avx512/avx512_packed_128_q40_to_f32.tmpl

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,13 @@ avx512_packed_128_q40_to_f32_{{suffix}} proc
5454
push rdi
5555
push rsi
5656

57+
// win: rcx:input rdx: output, r8:k
5758
mov rdi, rcx
59+
mov rsi, rdx
60+
mov rdx, r8
5861

5962
{% endif %}
6063

61-
push rbx
62-
push r12
63-
push r13
64-
push r14
65-
push r15
66-
6764
sub rsp, 8
6865
{% if family == "unix" %}
6966
.cfi_def_cfa_offset 64
@@ -153,12 +150,6 @@ avx512_packed_128_q40_to_f32_{{suffix}} proc
153150
ldmxcsr [rsp + 4]
154151
add rsp, 8
155152

156-
pop r15
157-
pop r14
158-
pop r13
159-
pop r12
160-
pop rbx
161-
162153
{% if family == "windows" %}
163154
pop rsi
164155
pop rdi

0 commit comments

Comments
 (0)