From c59f0a680d9d7d38229cb1d22ea366bda5b60a96 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 29 Jul 2025 16:39:28 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E4=BD=9C=E4=B8=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 3rd-party/googletest | 2 +- src/operators/concat.cc | 9 +++++++ src/operators/matmul.cc | 31 ++++++++++++++++++++++- src/operators/transpose.cc | 10 +++++++- src/operators/unary.cc | 12 ++++++--- src/utils/operator_utils.cc | 50 +++++++++++++++++++++++++++++++++++-- 6 files changed, 106 insertions(+), 8 deletions(-) diff --git a/3rd-party/googletest b/3rd-party/googletest index 3e3b44c..32f9f4c 160000 --- a/3rd-party/googletest +++ b/3rd-party/googletest @@ -1 +1 @@ -Subproject commit 3e3b44c300b21eb996a2957782421bc0f157af18 +Subproject commit 32f9f4c82afa4249af66b55278df15c16b3031ea diff --git a/src/operators/concat.cc b/src/operators/concat.cc index d196330..1f02bf1 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -11,11 +11,20 @@ ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim) optional> ConcatObj::inferShape(const TensorVec &inputs) { Shape dims = inputs[0]->getDims(); + Shape dims_h = inputs[1]->getDims(); auto rank = inputs[0]->getRank(); // =================================== 作业 =================================== // TODO:修改 dims,返回正确的 concat 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 + for(size_t i = 0;i < rank;i++) + { + if(dims[i]!=dims_h[i]) + { + dims[i] += dims_h[i]; + break; + } + } // =================================== 作业 =================================== return {{dims}}; diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 7a16ca2..b777a6f 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -26,8 +26,37 @@ namespace infini // =================================== 作业 =================================== // TODO:返回经过 matmul 操作后的 shape // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm + const auto A = inputs[0]; + const auto B = inputs[1]; + Shape dimA = A->getDims(); + Shape dimB = B->getDims(); + Shape out_shape = dimA; + int tempA = dimA.size() - 1; + int tempB = dimA.size() - 1; + if(dimA[dimA.size()-1] == dimB[dimB.size()-1]) + { + tempA = dimA.size()-2; + tempB = dimB.size()-2; + } + else if(dimA[dimA.size()-1] == dimB[dimB.size()-2]) + { + tempA = dimA.size()-2; + tempB = dimB.size()-1; + } + else if(dimA[dimA.size()-2] == dimB[dimB.size()-1]) + { + tempA = dimA.size()-1; + tempB = dimB.size()-2; + } + else if(dimA[dimA.size()-2] == dimB[dimB.size()-2]) + { + tempA = dimA.size()-1; + tempB = dimB.size()-1; + } + out_shape[dimA.size()-2] = dimA[tempA]; + out_shape[dimA.size()-1] = dimB[tempB]; // =================================== 作业 =================================== - return std::nullopt; + return {{out_shape}}; } } // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index faab2b6..8d0b92d 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -32,9 +32,17 @@ namespace infini // =================================== 作业 =================================== // TODO:修改 output_dim,返回正确的 transpose 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 + if(rank > 1) + { + for(int i=0;igetDims(); + auto output_dim = input_dim; // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 // =================================== 作业 =================================== - return std::nullopt; + return {{output_dim}}; } std::string ClipObj::toString() const @@ -66,7 +69,7 @@ namespace infini // REF_FILE: src/core/operator.cc // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return {}; + return {{getOutputDataType()}}; } optional> CastObj::inferShape(const TensorVec &inputs) @@ -74,8 +77,11 @@ namespace infini // =================================== 作业 =================================== // TODO:返回经过 cast 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 + const auto A = inputs[0]; + auto input_dim = A->getDims(); + auto output_dim = input_dim; // =================================== 作业 =================================== - return std::nullopt; + return {{output_dim}}; } std::string CastObj::toString() const diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc index edbd2c8..22ba511 100644 --- a/src/utils/operator_utils.cc +++ b/src/utils/operator_utils.cc @@ -8,9 +8,55 @@ Shape infer_broadcast(const Shape &A, const Shape &B) { // =================================== 作业 =================================== // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md + std::vector nB(B); + std::vector nA(A); + if(A.size() > B.size()) + { + for(size_t i=0;i<(A.size()-B.size());i++) + { + nB.insert(nB.begin(), 1); + // std::cout << "B" << std::endl; + } + + } + if(B.size() > A.size()) + { + for(size_t i=0;i<(B.size()-A.size());i++) + { + nA.insert(nA.begin(), 1); + // std::cout << "A" << std::endl; + } + + } + for(size_t i=0;i Date: Sat, 16 Aug 2025 00:10:33 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=AE=8C=E6=88=90naive=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=86=85=E5=AD=98=E7=AE=A1=E7=90=86=EF=BC=8C=E9=80=9A=E8=BF=87?= =?UTF-8?q?=E5=85=A8=E9=83=A8=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/core/allocator.h | 25 +++ include/core/graph.h | 4 +- src/core/allocator.cc | 67 ++++++++- src/core/graph.cc | 142 ++++++++++++++++++ src/operators/concat.cc | 18 ++- .../nativecpu/test_nativecpu_concat.cc | 2 + test/operators/test_concat.cc | 1 + 7 files changed, 251 insertions(+), 8 deletions(-) diff --git a/include/core/allocator.h b/include/core/allocator.h index 002601d..f51f933 100644 --- a/include/core/allocator.h +++ b/include/core/allocator.h @@ -25,6 +25,31 @@ namespace infini { // =================================== 作业 =================================== // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 + + // from InfiniTensor + struct freeBlockInfo { + size_t addr; + size_t blockSize; + }; + + struct cmpFreeBlockInfo { + bool operator()(const freeBlockInfo &a, const freeBlockInfo &b) const { + return (a.blockSize != b.blockSize) ? (a.blockSize < b.blockSize) + : (a.addr < b.addr); + } + }; + + // free balanced tree, maintains all free memory blocks + std::set freeBlocks; + + // key: head address offset of the free memory block + // value: blockSize of the block + std::unordered_map headAddrToBlockSize; + + // key: tail address offset of the free memory block + // value: blockSize of the block + std::unordered_map tailAddrToBlockSize; + // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 // =================================== 作业 =================================== diff --git a/include/core/graph.h b/include/core/graph.h index c45580c..771fc45 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -58,14 +58,16 @@ namespace infini void dataMalloc(); /** - * @brief Add an operator and create its outputs. Output tensor arguments + * @brief Add an operator and create its outputs. Output tensor arguments should be empty Refs (e.g., nullptr). * should be empty Refs (e.g., nullptr). */ template Ref addOp(Args &&...args) { Ref op = infini::make_ref(this, std::forward(args)...); + // this->print(); addOperatorAndConnect(op); + // this->print(); return op; } diff --git a/src/core/allocator.cc b/src/core/allocator.cc index ff593ae..90c74db 100644 --- a/src/core/allocator.cc +++ b/src/core/allocator.cc @@ -31,9 +31,45 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来分配内存,返回起始地址偏移量 + auto it = this->freeBlocks.lower_bound(freeBlockInfo{(size_t)0, size}); + size_t retAddr = this->peak; + if(it != freeBlocks.end()) //find free block + { + size_t blockSize = it->blockSize; + retAddr = it->addr; + size_t tailAddr = retAddr + size; + this->headAddrToBlockSize.erase(retAddr); + this->tailAddrToBlockSize.erase(tailAddr); + if(blockSize > size) + { + freeBlockInfo newBlock = {tailAddr, blockSize - size}; + this->headAddrToBlockSize[tailAddr] = newBlock.blockSize; + this->tailAddrToBlockSize[tailAddr + newBlock.blockSize] = newBlock.blockSize; + this->freeBlocks.insert(newBlock); + } + this->freeBlocks.erase(it); + } + else + { + auto blockTailWithPeak = this->tailAddrToBlockSize.find(this->peak); + if(blockTailWithPeak != this->tailAddrToBlockSize.end()) + { + retAddr = this->peak - blockTailWithPeak->second; + this->peak += (size - blockTailWithPeak->second); + freeBlockInfo endBlock = {retAddr, blockTailWithPeak->second}; + this->freeBlocks.erase(endBlock); + this->headAddrToBlockSize.erase(endBlock.addr); + this->tailAddrToBlockSize.erase(endBlock.addr + endBlock.blockSize); + } + else + { + this->peak += size; + } + } + this->used += size; // =================================== 作业 =================================== - return 0; + return retAddr; } void Allocator::free(size_t addr, size_t size) @@ -43,6 +79,35 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来回收内存 + auto tailAddr = addr + size; + freeBlockInfo block = {addr, size}; + this->headAddrToBlockSize[block.addr] = block.blockSize; + this->tailAddrToBlockSize[tailAddr] = block.blockSize; + auto preFreeBlockIter = this->tailAddrToBlockSize.find(addr); + auto subFreeBlockIter = this->headAddrToBlockSize.find(tailAddr); + if(preFreeBlockIter != this->tailAddrToBlockSize.end()) + { + size_t preBlockSize = preFreeBlockIter->second; + this->headAddrToBlockSize.erase(block.addr); + this->headAddrToBlockSize[block.addr - preBlockSize] += block.blockSize; + this->tailAddrToBlockSize.erase(block.addr); + this->tailAddrToBlockSize[tailAddr] += preBlockSize; + block.addr -= preBlockSize; + block.blockSize += preBlockSize; + this->freeBlocks.erase(freeBlockInfo({block.addr, preBlockSize})); + } + if(subFreeBlockIter != this->headAddrToBlockSize.end()) + { + size_t subBlockSize = preFreeBlockIter->second; + this->headAddrToBlockSize.erase(tailAddr); + this->headAddrToBlockSize[block.addr] += subBlockSize; + this->tailAddrToBlockSize.erase(tailAddr); + this->tailAddrToBlockSize[tailAddr + subBlockSize] += block.blockSize; + block.blockSize += subBlockSize; + this->freeBlocks.erase(freeBlockInfo({tailAddr, subBlockSize})); + } + this->freeBlocks.insert(block); + this->used -= size; // =================================== 作业 =================================== } diff --git a/src/core/graph.cc b/src/core/graph.cc index 3a90637..2ca3599 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -1,4 +1,5 @@ #include "core/graph.h" +#include "operators/matmul.h" #include #include #include @@ -105,6 +106,130 @@ namespace infini // 图优化规则如下: // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) + Operator last_opt; + // OpVec delop; + // TensorVec save_tensors; + for(size_t i = 0; i < ops.size();) + { + auto op = ops[i]; + if(op->getOpType().underlying() == 10) + { + auto predecessors = op->getPredecessors(); + for(auto predecessor : predecessors) + { + if(predecessor->getOpType().underlying() == 10) + { + TensorVec Outputs = op->getOutputs(); + TensorVec Inputs = predecessor->getInputs(); + Tensor input = Inputs[0]; + Tensor output = Outputs[0]; + if(output->getDims() == input->getDims()) + { + std::cout << "transpose delete" << std::endl; + this->removeOperator(op); + this->removeOperator(predecessor); + this->removeTensor(op->getInputs()[0]); + this->removeTensor(op->getOutputs()[0]); + + if (auto pred = input->getSource()) + { + pred->removeSuccessors(predecessor); + for (auto &succ : output->getTargets()) + { + pred->addSuccessors(succ); + } + } + input->removeTarget(predecessor); + for (auto &succ : output->getTargets()) + { + input->addTarget(succ); + succ->replaceInput(output, input); + succ->removePredecessors(op); + for (auto &predop : predecessor->getPredecessors()) + { + succ->addPredecessors(predop); + } + } + i--; + continue; + } + } + } + } + else if(op->getOpType().underlying() == 7) + { + Tensor tensorA = op->getInputs()[0]; + Tensor tensorB = op->getInputs()[1]; + if(const auto &source = tensorA->getSource()) + { + if(source->getOpType().underlying() == 10) + { + Tensor input = source->getInputs()[0]; + Tensor output = source->getOutputs()[0]; + auto input_dim = input->getDims(); + auto output_dim = output->getDims(); + if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2]) + { + std::cout << "transpose merge A" << std::endl; + Tensor input = source->getInputs()[0]; + Tensor output = source->getOutputs()[0]; + + // update op info + for (auto &predop : source->getPredecessors()) + { + predop->removeSuccessors(source); + predop->addSuccessors(op); + op->removePredecessors(source); + op->addPredecessors(predop); + } + input->removeTarget(source); + input->addTarget(op); + op->replaceInput(output, input); + auto* matmulOp = dynamic_cast(source.get()); + matmulOp->setTransA(true); + continue; + } + } + } + if(const auto &source = tensorB->getSource()) + { + if(source->getOpType().underlying() == 10) + { + Tensor input = source->getInputs()[0]; + Tensor output = source->getOutputs()[0]; + auto input_dim = input->getDims(); + auto output_dim = output->getDims(); + if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2]) + { + std::cout << "transpose merge B" << std::endl; + // std::cout << input << std::endl; + Tensor input = source->getInputs()[0]; + Tensor output = source->getOutputs()[0]; + // update op info + op->removePredecessors(source); + for (auto &predop : source->getPredecessors()) + { + predop->removeSuccessors(source); + predop->addSuccessors(op); + op->addPredecessors(predop); + } + input->removeTarget(source); + input->addTarget(op); + op->replaceInput(output, input); + auto* matmulOp = dynamic_cast(op.get()); + matmulOp->setTransB(true); + this->removeOperator(source); + this->removeTensor(output); + // this->print(); + continue; + } + } + } + } + i++; + } + std::cout << "Optimize complete!" << std::endl << std::endl; + // =================================== 作业 =================================== } @@ -151,6 +276,23 @@ namespace infini // =================================== 作业 =================================== // TODO:利用 allocator 给计算图分配内存 // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 + // Naive Version + std::unordered_map, size_t> tensorToOffset; + for(auto tensor : tensors) + { + tensorToOffset[tensor] = allocator.alloc(tensor->getBytes()); + // std::cout << "loop1end" << std::endl; + } + for(auto tensor : tensors) + { + tensor->setDataBlob(make_ref + ( + tensor->runtime, + static_cast(allocator.getPtr()) + + tensorToOffset[tensor] + ) + ); + } // =================================== 作业 =================================== allocator.info(); diff --git a/src/operators/concat.cc b/src/operators/concat.cc index 1f02bf1..c05edf3 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -1,17 +1,20 @@ #include "operators/concat.h" #include "utils/operator_utils.h" +#include "core/graph.h" namespace infini { ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim) : OperatorObj(OpType::Concat, inputs, {output}) { + // TensorVec a = {nullptr}; + // std::cout << "outputsize:" << a.size() << std::endl; //这个空元素竟然也会size+1 int rank = inputs[0]->getRank(); dim = get_real_axis(_dim, rank); - IT_ASSERT(checkValid(graph)); + IT_ASSERT(checkValid(graph)); //outputs established in here } optional> ConcatObj::inferShape(const TensorVec &inputs) { Shape dims = inputs[0]->getDims(); - Shape dims_h = inputs[1]->getDims(); + Shape ans = inputs[0]->getDims(); auto rank = inputs[0]->getRank(); // =================================== 作业 =================================== @@ -19,15 +22,18 @@ optional> ConcatObj::inferShape(const TensorVec &inputs) { // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 for(size_t i = 0;i < rank;i++) { - if(dims[i]!=dims_h[i]) + for(size_t j = 1;j < inputs.size();j++) { - dims[i] += dims_h[i]; - break; + // inputs[j]->print(); + if(dims[i] != inputs[j]->getDims()[i]) + { + ans[i] += inputs[j]->getDims()[i]; + } } } // =================================== 作业 =================================== - return {{dims}}; + return {{ans}}; } std::string ConcatObj::toString() const { diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc index fc87fb1..b6bf2d8 100644 --- a/test/kernels/nativecpu/test_nativecpu_concat.cc +++ b/test/kernels/nativecpu/test_nativecpu_concat.cc @@ -14,10 +14,12 @@ TEST(Concat, NativeCpu) { auto t2 = g->addTensor({2, 2, 1, 1}, DataType::Float32); auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); + g->print(); g->dataMalloc(); t1->setData(IncrementalGenerator()); t2->setData(OneGenerator()); t3->setData(OneGenerator()); + g->print(); runtime->run(g); EXPECT_TRUE(op->getOutput()->equalData( diff --git a/test/operators/test_concat.cc b/test/operators/test_concat.cc index 8984b9f..834222c 100644 --- a/test/operators/test_concat.cc +++ b/test/operators/test_concat.cc @@ -11,6 +11,7 @@ TEST(Concat, ShapeInfer) { auto t2 = g->addTensor({1, 3, 2, 5}, DataType::Float32); auto op = g->addOp(TensorVec{t1, t2}, nullptr, 3); + // g->print(); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 9})); } } // namespace infini