Skip to content

Commit 876be27

Browse files
[fix] Fix block header decompression for inflate with indexing on asynchronous path (#803)
Co-authored-by: Meng Cao <[email protected]>
1 parent a614ed8 commit 876be27

File tree

5 files changed

+46
-18
lines changed

5 files changed

+46
-18
lines changed

doc/source/documentation/dev_guide_docs/c_use_cases/deflate/c_deflate_indexing_usage.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ After the job completes, the returned CRC value (in job crc) should
171171
match the CRC value from the end of the mini-block, in this example 10.
172172
If it does not match, then the mini-block is not decompressed properly.
173173

174+
.. note::
175+
176+
The state of decompression with mini-blocks is not saved from job
177+
to job. Namely the :c:member:`qpl_job.total_out` will not store the
178+
cumulative number of bytes written, but only that job's particular
179+
mini-block's number of bytes written.
174180

175181
Single Block Usage
176182
==================

sources/c_api/legacy_hw_path/hardware_defs.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ extern "C" {
8888
#define IS_RND_ACCESS_BODY(flag) \
8989
(((flag) & (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST)) == QPL_FLAG_RND_ACCESS)
9090

91+
92+
/**
93+
* Random Access Header is FLAG_RND_ACCESS and FLAG_FIRST
94+
*/
95+
#define IS_RND_ACCESS_HDR(flag) \
96+
(((flag) & (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST)) == (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST))
97+
9198
/**
9299
* @todo
93100
*/

sources/c_api/legacy_hw_path/qpl_hw_inflate_job.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,13 @@ extern "C" qpl_status hw_submit_decompress_job(qpl_job *qpl_job_ptr,
5454
auto *const state_ptr = reinterpret_cast<qpl_hw_state *>(job::get_state(qpl_job_ptr));
5555

5656
hw_iaa_analytics_descriptor *const desc_ptr = &state_ptr->desc_ptr;
57-
hw_iaa_aecs_analytic *const aecs_ptr = GET_DCFG(state_ptr);
57+
hw_iaa_aecs_analytic *aecs_ptr = GET_DCFG(state_ptr);
5858

59+
if (IS_RND_ACCESS_HDR(qpl_job_ptr->flags)) {
60+
aecs_ptr = (hw_iaa_aecs_analytic *) ((uint8_t *) (state_ptr->dcfg) + HW_AECS_FILTER_AND_DECOMPRESS_WA_HB);
61+
}
62+
63+
HW_IMMEDIATELY_RET_NULLPTR(aecs_ptr);
5964
last_job = (available_in > MAX_BUF_SIZE) ? 0u : last_job;
6065

6166
// Descriptor buffers set
@@ -79,7 +84,6 @@ extern "C" qpl_status hw_submit_decompress_job(qpl_job *qpl_job_ptr,
7984
const bool is_dictionary_mode = (qpl_job_ptr->flags & QPL_FLAG_FIRST ||
8085
!state_ptr->execution_history.first_job_has_been_submitted) &&
8186
qpl_job_ptr->dictionary != NULL;
82-
8387
bool is_aecs_format2_expected = qpl::ml::util::are_iaa_gen_2_min_capabilities_present();
8488

8589
if (state_ptr->execution_history.first_job_has_been_submitted) {
@@ -92,13 +96,6 @@ extern "C" qpl_status hw_submit_decompress_job(qpl_job *qpl_job_ptr,
9296
operation_flags |= ADOF_READ_SRC2(AD_RDSRC2_AECS);
9397
}
9498

95-
// Decompress random header
96-
if (qpl_job_ptr->ignore_start_bits != 0u) {
97-
operation_flags |= ADOF_READ_SRC2(AD_RDSRC2_AECS);
98-
core_sw::util::set_zeros((uint8_t *) aecs_ptr, sizeof(hw_iaa_aecs_analytic));
99-
aecs_ptr->inflate_options.decompress_state = DEF_STATE_HDR;
100-
}
101-
10299
// Decompress huffman only
103100
if (qpl_job_ptr->flags & QPL_FLAG_NO_HDRS) {
104101
operation_flags |= ADOF_READ_SRC2(AD_RDSRC2_AECS);
@@ -131,13 +128,14 @@ extern "C" qpl_status hw_submit_decompress_job(qpl_job *qpl_job_ptr,
131128

132129
}
133130

134-
// Set the input accum alignment if we're doing random access
135-
if ((qpl_job_ptr->flags & QPL_FLAG_RND_ACCESS)
136-
&& (hw_iaa_aecs_decompress_is_empty_input_accumulator(&aecs_ptr->inflate_options))) {
137-
aecs_ptr->inflate_options.idx_bit_offset = 7u & qpl_job_ptr->ignore_start_bits;
138-
}
139131

140132
if (0u != qpl_job_ptr->ignore_start_bits) {
133+
if (IS_RND_ACCESS_HDR(qpl_job_ptr->flags)) {
134+
operation_flags |= ADOF_READ_SRC2(AD_RDSRC2_AECS);
135+
core_sw::util::set_zeros(reinterpret_cast<uint8_t *>(aecs_ptr), HW_AECS_FILTER_AND_DECOMPRESS_WA_HB);
136+
aecs_ptr->inflate_options.idx_bit_offset = 7u & qpl_job_ptr->ignore_start_bits;
137+
aecs_ptr->inflate_options.decompress_state = DEF_STATE_HDR;
138+
}
141139
auto status = hw_iaa_aecs_decompress_set_input_accumulator(&aecs_ptr->inflate_options,
142140
desc_ptr->src1_ptr,
143141
qpl_job_ptr->available_in,
@@ -149,10 +147,12 @@ extern "C" qpl_status hw_submit_decompress_job(qpl_job *qpl_job_ptr,
149147
desc_ptr->src1_ptr = ++qpl_job_ptr->next_in_ptr;
150148
desc_ptr->src1_size = --qpl_job_ptr->available_in;
151149
}
152-
153150
hw_iaa_aecs_decompress_state_set_aecs_format(&aecs_ptr->inflate_options, is_aecs_format2_expected);
154151

155152
// AECS Write policy
153+
if (IS_RND_ACCESS_HDR(qpl_job_ptr->flags)) {
154+
operation_flags |= ADOF_AECS_SEL;
155+
}
156156
if (IS_RND_ACCESS_BODY(qpl_job_ptr->flags)) {
157157
decompression_flags |= ADDF_FLUSH_OUTPUT;
158158
hw_iaa_aecs_decompress_set_crc_seed(aecs_ptr, qpl_job_ptr->crc);

sources/c_api/legacy_hw_path/qpl_hw_submit_job.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ extern "C" qpl_status hw_submit_job (qpl_job * qpl_job_ptr) {
400400
QPL_STS_OPERATION_ERR)
401401
return hw_submit_analytic_task(qpl_job_ptr);
402402

403-
case qpl_op_decompress:
403+
case qpl_op_decompress: {
404404
if (qpl_job_ptr->dictionary != NULL && qpl_job_ptr->flags & QPL_FLAG_CANNED_MODE) {
405405
// dictionary with canned mode
406406
// TODO: remove once it's supported
@@ -416,9 +416,16 @@ extern "C" qpl_status hw_submit_job (qpl_job * qpl_job_ptr) {
416416
break; // Workaround for header reading
417417
}
418418

419+
uint32_t crc = qpl_job_ptr->crc;
419420
job::reset<qpl_op_decompress>(qpl_job_ptr);
421+
422+
if (flags & QPL_FLAG_RND_ACCESS){
423+
own_hw_state_reset(state_ptr);
424+
qpl_job_ptr->crc = crc;
425+
}
420426
state_ptr->aecs_size = HW_AECS_FILTER_AND_DECOMPRESS_WA_HB;
421427
return hw_submit_task(qpl_job_ptr);
428+
}
422429
case qpl_op_compress:
423430
if (flags & QPL_FLAG_FIRST) {
424431
job::reset<qpl_op_compress>(qpl_job_ptr);
@@ -483,9 +490,10 @@ extern "C" qpl_status hw_submit_job (qpl_job * qpl_job_ptr) {
483490
uint32_t source_size = qpl_job_ptr->available_in;
484491

485492
qpl_buffer *const accumulator_ptr = &state_ptr->accumulation_buffer;
486-
bool is_last_job = flags & QPL_FLAG_LAST;
493+
bool is_last_job = flags & QPL_FLAG_LAST;
494+
bool is_indexing_mode = flags & QPL_FLAG_RND_ACCESS;
487495

488-
if ((!is_last_job)
496+
if ((!is_last_job && !is_indexing_mode)
489497
&& own_qpl_buffer_touch(accumulator_ptr, source_size)) {
490498
own_qpl_buffer_fill(accumulator_ptr, source_ptr, source_size);
491499
hw_iaa_completion_record_init_trivial_completion(&state_ptr->comp_ptr, source_size);

sources/core-iaa/sources/include/own_hw_definitions.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ extern "C" {
113113
#define IS_RND_ACCESS_BODY(flag) \
114114
(((flag) & (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST)) == QPL_FLAG_RND_ACCESS)
115115

116+
/**
117+
* Random Access Header is FLAG_RND_ACCESS and FLAG_FIRST
118+
*/
119+
#define IS_RND_ACCESS_HDR(flag) \
120+
(((flag) & (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST)) == (QPL_FLAG_RND_ACCESS | QPL_FLAG_FIRST))
121+
122+
116123
#ifdef __GNUC__
117124
#define UNREFERENCED_PARAMETER(p) p __attribute__((unused)) /**< Unreferenced parameter - warning removal */
118125
#else

0 commit comments

Comments
 (0)