From c59f0a680d9d7d38229cb1d22ea366bda5b60a96 Mon Sep 17 00:00:00 2001
From: root <root@DESKTOP-9MUAJVQ.localdomain>
Date: Tue, 29 Jul 2025 16:39:28 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E7=9B=B8=E5=85=B3=E4=BD=9C=E4=B8=9A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 3rd-party/googletest        |  2 +-
 src/operators/concat.cc     |  9 +++++++
 src/operators/matmul.cc     | 31 ++++++++++++++++++++++-
 src/operators/transpose.cc  | 10 +++++++-
 src/operators/unary.cc      | 12 ++++++---
 src/utils/operator_utils.cc | 50 +++++++++++++++++++++++++++++++++++--
 6 files changed, 106 insertions(+), 8 deletions(-)
diff --git a/3rd-party/googletest b/3rd-party/googletest
index 3e3b44c..32f9f4c 160000
--- a/3rd-party/googletest
+++ b/3rd-party/googletest
@@ -1 +1 @@
-Subproject commit 3e3b44c300b21eb996a2957782421bc0f157af18
+Subproject commit 32f9f4c82afa4249af66b55278df15c16b3031ea
diff --git a/src/operators/concat.cc b/src/operators/concat.cc
index d196330..1f02bf1 100644
--- a/src/operators/concat.cc
+++ b/src/operators/concat.cc
@@ -11,11 +11,20 @@ ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim)
 
 optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     Shape dims = inputs[0]->getDims();
+    Shape dims_h = inputs[1]->getDims();
     auto rank = inputs[0]->getRank();
 
     // =================================== 作业 ===================================
     // TODO：修改 dims，返回正确的 concat 后的 shape
     // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
+    for(size_t i = 0;i < rank;i++)
+    {
+        if(dims[i]!=dims_h[i])
+        {
+            dims[i] += dims_h[i];
+            break;
+        }
+    }
     // =================================== 作业 ===================================
 
     return {{dims}};
diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc
index 7a16ca2..b777a6f 100644
--- a/src/operators/matmul.cc
+++ b/src/operators/matmul.cc
@@ -26,8 +26,37 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：返回经过 matmul 操作后的 shape
         // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
+        const auto A = inputs[0];
+        const auto B = inputs[1];
+        Shape dimA = A->getDims();
+        Shape dimB = B->getDims();
+        Shape out_shape = dimA;
+        int tempA = dimA.size() - 1;
+        int tempB = dimA.size() - 1;
+        if(dimA[dimA.size()-1] == dimB[dimB.size()-1])
+        {
+            tempA = dimA.size()-2;
+            tempB = dimB.size()-2;
+        }
+        else if(dimA[dimA.size()-1] == dimB[dimB.size()-2])
+        {
+            tempA = dimA.size()-2;
+            tempB = dimB.size()-1;            
+        }
+        else if(dimA[dimA.size()-2] == dimB[dimB.size()-1])
+        {
+            tempA = dimA.size()-1;
+            tempB = dimB.size()-2;            
+        }
+        else if(dimA[dimA.size()-2] == dimB[dimB.size()-2])
+        {
+            tempA = dimA.size()-1;
+            tempB = dimB.size()-1;            
+        }
+        out_shape[dimA.size()-2] = dimA[tempA];
+        out_shape[dimA.size()-1] = dimB[tempB];
         // =================================== 作业 ===================================
-        return std::nullopt;
+        return {{out_shape}};
     }
 
 } // namespace infini
\ No newline at end of file
diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc
index faab2b6..8d0b92d 100644
--- a/src/operators/transpose.cc
+++ b/src/operators/transpose.cc
@@ -32,9 +32,17 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：修改 output_dim，返回正确的 transpose 后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
+        if(rank > 1)
+        {
+            for(int i=0;i<rank;i++)
+            {
+                output_dim[i] = input_dim[transposePermute[i]];
+            }
+        }
         // =================================== 作业 ===================================
 
-        return std::nullopt;
+        //return std::nullopt;
+        return {{output_dim}};
     }
 
     std::string TransposeObj::toString() const
diff --git a/src/operators/unary.cc b/src/operators/unary.cc
index 3daad36..7dc3f35 100644
--- a/src/operators/unary.cc
+++ b/src/operators/unary.cc
@@ -37,9 +37,12 @@ namespace infini
     {
         // =================================== 作业 ===================================
         // TODO：返回经过 clip 操作后的 shape
+        const auto A = inputs[0];
+        auto input_dim = A->getDims();
+        auto output_dim = input_dim;
         // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13
         // =================================== 作业 ===================================
-        return std::nullopt;
+        return {{output_dim}};
     }
 
     std::string ClipObj::toString() const
@@ -66,7 +69,7 @@ namespace infini
         // REF_FILE: src/core/operator.cc
         // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21
         // =================================== 作业 ===================================
-        return {};
+        return {{getOutputDataType()}};
     }
 
     optional<vector<Shape>> CastObj::inferShape(const TensorVec &inputs)
@@ -74,8 +77,11 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：返回经过 cast 操作后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21
+        const auto A = inputs[0];
+        auto input_dim = A->getDims();
+        auto output_dim = input_dim;        
         // =================================== 作业 ===================================
-        return std::nullopt;
+        return {{output_dim}};
     }
 
     std::string CastObj::toString() const
diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc
index edbd2c8..22ba511 100644
--- a/src/utils/operator_utils.cc
+++ b/src/utils/operator_utils.cc
@@ -8,9 +8,55 @@ Shape infer_broadcast(const Shape &A, const Shape &B) {
     // =================================== 作业 ===================================
     // TODO：对 A 和 B 进行双向广播，返回广播后的形状。
     // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md
+    std::vector<int> nB(B);
+    std::vector<int> nA(A);
+    if(A.size() > B.size())
+    {
+        for(size_t i=0;i<(A.size()-B.size());i++)
+        {
+            nB.insert(nB.begin(), 1); 
+            // std::cout << "B" << std::endl;
+        }
+            
+    }
+    if(B.size() > A.size())
+    {
+        for(size_t i=0;i<(B.size()-A.size());i++)
+        {
+            nA.insert(nA.begin(), 1); 
+            // std::cout << "A" << std::endl;
+        }
+            
+    }
+    for(size_t i=0;i<nA.size();i++)
+    {
+        if(nA[i]==1)
+        {
+            nA[i] = nB[i];
+        }
+    }
+    // std::cout << "A:";
+    // for(size_t i=0;i<A.size();i++)
+    // {
+    //     std::cout << A[i] << " ";
+    // }
+    // std::cout << std::endl;
+
+    // std::cout << "B:";
+    // for(size_t i=0;i<B.size();i++)
+    // {
+    //     std::cout << B[i] << " ";
+    // }
+    // std::cout << std::endl;
+
+    // std::cout << "OUTPUT:";
+    // for(size_t i=0;i<nA.size();i++)
+    // {
+    //     std::cout << nA[i] << " ";
+    // }
+    // std::cout << std::endl;
     // =================================== 作业 ===================================
-    
-    return {};
+    return {nA};
 }
 
 int get_real_axis(const int &axis, const int &rank) {

From ba71fa9ec7bacb997b2350e7a8f8c59b7211568f Mon Sep 17 00:00:00 2001
From: wudizhr <18701263118@163.com>
Date: Sat, 16 Aug 2025 00:10:33 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E5=AE=8C=E6=88=90naive=E7=89=88=E6=9C=AC?=
 =?UTF-8?q?=E5=86=85=E5=AD=98=E7=AE=A1=E7=90=86=EF=BC=8C=E9=80=9A=E8=BF=87?=
 =?UTF-8?q?=E5=85=A8=E9=83=A8=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/core/allocator.h                      |  25 +++
 include/core/graph.h                          |   4 +-
 src/core/allocator.cc                         |  67 ++++++++-
 src/core/graph.cc                             | 142 ++++++++++++++++++
 src/operators/concat.cc                       |  18 ++-
 .../nativecpu/test_nativecpu_concat.cc        |   2 +
 test/operators/test_concat.cc                 |   1 +
 7 files changed, 251 insertions(+), 8 deletions(-)

diff --git a/include/core/allocator.h b/include/core/allocator.h
index 002601d..f51f933 100644
--- a/include/core/allocator.h
+++ b/include/core/allocator.h
@@ -25,6 +25,31 @@ namespace infini {
 
     // =================================== 作业 ===================================
     // TODO：可能需要设计一个数据结构来存储free block，以便于管理和合并
+    
+    // from InfiniTensor
+    struct freeBlockInfo {
+        size_t addr;
+        size_t blockSize;
+    };
+
+    struct cmpFreeBlockInfo {
+        bool operator()(const freeBlockInfo &a, const freeBlockInfo &b) const {
+            return (a.blockSize != b.blockSize) ? (a.blockSize < b.blockSize)
+                                                : (a.addr < b.addr);
+        }
+    };
+
+    // free balanced tree, maintains all free memory blocks
+    std::set<freeBlockInfo, cmpFreeBlockInfo> freeBlocks;
+
+    // key: head address offset of the free memory block
+    // value: blockSize of the block
+    std::unordered_map<size_t, size_t> headAddrToBlockSize;
+
+    // key: tail address offset of the free memory block
+    // value: blockSize of the block
+    std::unordered_map<size_t, size_t> tailAddrToBlockSize;
+
     // HINT: 可以使用一个 map 来存储 free block，key 为 block 的起始/结尾地址，value 为 block 的大小
     // =================================== 作业 ===================================
 
diff --git a/include/core/graph.h b/include/core/graph.h
index c45580c..771fc45 100644
--- a/include/core/graph.h
+++ b/include/core/graph.h
@@ -58,14 +58,16 @@ namespace infini
         void dataMalloc();
 
         /**
-         * @brief Add an operator and create its outputs. Output tensor arguments
+         * @brief Add an operator and create its outputs. Output tensor arguments should be empty Refs (e.g., nullptr).
          * should be empty Refs (e.g., nullptr).
          */
         template <typename T, typename... Args>
         Ref<T> addOp(Args &&...args)
         {
             Ref<T> op = infini::make_ref<T>(this, std::forward<Args>(args)...);
+            // this->print();
             addOperatorAndConnect(op);
+            // this->print();
             return op;
         }
 
diff --git a/src/core/allocator.cc b/src/core/allocator.cc
index ff593ae..90c74db 100644
--- a/src/core/allocator.cc
+++ b/src/core/allocator.cc
@@ -31,9 +31,45 @@ namespace infini
 
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来分配内存，返回起始地址偏移量
+        auto it = this->freeBlocks.lower_bound(freeBlockInfo{(size_t)0, size});
+        size_t retAddr = this->peak;
+        if(it != freeBlocks.end()) //find free block
+        {
+            size_t blockSize = it->blockSize;
+            retAddr = it->addr;
+            size_t tailAddr = retAddr + size;
+            this->headAddrToBlockSize.erase(retAddr);
+            this->tailAddrToBlockSize.erase(tailAddr);
+            if(blockSize > size)
+            {
+                freeBlockInfo newBlock = {tailAddr, blockSize - size};
+                this->headAddrToBlockSize[tailAddr] = newBlock.blockSize;
+                this->tailAddrToBlockSize[tailAddr + newBlock.blockSize] = newBlock.blockSize;
+                this->freeBlocks.insert(newBlock);
+            }
+            this->freeBlocks.erase(it);
+        }
+        else
+        {
+            auto blockTailWithPeak = this->tailAddrToBlockSize.find(this->peak);
+            if(blockTailWithPeak != this->tailAddrToBlockSize.end())
+            {
+                retAddr = this->peak - blockTailWithPeak->second;
+                this->peak += (size - blockTailWithPeak->second);
+                freeBlockInfo endBlock = {retAddr, blockTailWithPeak->second};
+                this->freeBlocks.erase(endBlock);
+                this->headAddrToBlockSize.erase(endBlock.addr);
+                this->tailAddrToBlockSize.erase(endBlock.addr + endBlock.blockSize);
+            }
+            else
+            {
+                this->peak += size;
+            }
+        }
+        this->used += size;
         // =================================== 作业 ===================================
 
-        return 0;
+        return retAddr;
     }
 
     void Allocator::free(size_t addr, size_t size)
@@ -43,6 +79,35 @@ namespace infini
 
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来回收内存
+        auto tailAddr = addr + size;
+        freeBlockInfo block = {addr, size};
+        this->headAddrToBlockSize[block.addr] = block.blockSize;
+        this->tailAddrToBlockSize[tailAddr] = block.blockSize;
+        auto preFreeBlockIter = this->tailAddrToBlockSize.find(addr);
+        auto subFreeBlockIter = this->headAddrToBlockSize.find(tailAddr);
+        if(preFreeBlockIter != this->tailAddrToBlockSize.end())
+        {
+            size_t preBlockSize = preFreeBlockIter->second;
+            this->headAddrToBlockSize.erase(block.addr);
+            this->headAddrToBlockSize[block.addr - preBlockSize] += block.blockSize;
+            this->tailAddrToBlockSize.erase(block.addr);
+            this->tailAddrToBlockSize[tailAddr] += preBlockSize;
+            block.addr -= preBlockSize;
+            block.blockSize += preBlockSize;
+            this->freeBlocks.erase(freeBlockInfo({block.addr, preBlockSize}));
+        }
+        if(subFreeBlockIter != this->headAddrToBlockSize.end())
+        {
+            size_t subBlockSize = preFreeBlockIter->second;
+            this->headAddrToBlockSize.erase(tailAddr);
+            this->headAddrToBlockSize[block.addr] += subBlockSize;
+            this->tailAddrToBlockSize.erase(tailAddr);
+            this->tailAddrToBlockSize[tailAddr + subBlockSize] += block.blockSize;
+            block.blockSize += subBlockSize;
+            this->freeBlocks.erase(freeBlockInfo({tailAddr, subBlockSize}));            
+        }
+        this->freeBlocks.insert(block);
+        this->used -= size;
         // =================================== 作业 ===================================
     }
 
diff --git a/src/core/graph.cc b/src/core/graph.cc
index 3a90637..2ca3599 100644
--- a/src/core/graph.cc
+++ b/src/core/graph.cc
@@ -1,4 +1,5 @@
 #include "core/graph.h"
+#include "operators/matmul.h"
 #include <algorithm>
 #include <numeric>
 #include <queue>
@@ -105,6 +106,130 @@ namespace infini
         // 图优化规则如下：
         // 1. 去除冗余的算子（例如，两个相邻的算子都是 transpose 算子，且做的是相反的操作，可以将其全部删除）
         // 2. 合并算子（例如，矩阵乘算子中含有属性transA、transB，如果其输入存在transpose，且对最后两个维度做交换，就可以将transpose融入到矩阵乘算子的属性中去）
+        Operator last_opt;
+        // OpVec delop;
+        // TensorVec save_tensors;
+        for(size_t i = 0; i < ops.size();)
+        {
+            auto op = ops[i];
+            if(op->getOpType().underlying() == 10)
+            {
+                auto predecessors = op->getPredecessors();
+                for(auto predecessor : predecessors)
+                {
+                    if(predecessor->getOpType().underlying() == 10)
+                    {
+                        TensorVec Outputs = op->getOutputs();
+                        TensorVec Inputs = predecessor->getInputs();
+                        Tensor input = Inputs[0];
+                        Tensor output = Outputs[0];
+                        if(output->getDims() == input->getDims())
+                        {
+                            std::cout << "transpose delete" << std::endl;
+                            this->removeOperator(op);
+                            this->removeOperator(predecessor);
+                            this->removeTensor(op->getInputs()[0]);
+                            this->removeTensor(op->getOutputs()[0]);
+
+                            if (auto pred = input->getSource())
+                            {
+                                pred->removeSuccessors(predecessor);
+                                for (auto &succ : output->getTargets())
+                                {
+                                    pred->addSuccessors(succ);
+                                }
+                            }
+                            input->removeTarget(predecessor);
+                            for (auto &succ : output->getTargets())
+                            {
+                                input->addTarget(succ);
+                                succ->replaceInput(output, input);
+                                succ->removePredecessors(op);
+                                for (auto &predop : predecessor->getPredecessors())
+                                {
+                                    succ->addPredecessors(predop);
+                                }
+                            }
+                            i--;
+                            continue;
+                        }
+                    }
+                }                
+            }
+            else if(op->getOpType().underlying() == 7)
+            {
+                Tensor tensorA = op->getInputs()[0];
+                Tensor tensorB = op->getInputs()[1];
+                if(const auto &source = tensorA->getSource())
+                {
+                    if(source->getOpType().underlying() == 10)
+                    {
+                        Tensor input = source->getInputs()[0];
+                        Tensor output = source->getOutputs()[0];
+                        auto input_dim = input->getDims();
+                        auto output_dim = output->getDims();
+                        if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2])
+                        {
+                            std::cout << "transpose merge A" << std::endl;
+                            Tensor input = source->getInputs()[0];
+                            Tensor output = source->getOutputs()[0];
+                            
+                            // update op info
+                            for (auto &predop : source->getPredecessors())
+                            {
+                                predop->removeSuccessors(source);
+                                predop->addSuccessors(op);
+                                op->removePredecessors(source);
+                                op->addPredecessors(predop);
+                            }  
+                            input->removeTarget(source);
+                            input->addTarget(op);
+                            op->replaceInput(output, input);   
+                            auto* matmulOp = dynamic_cast<MatmulObj*>(source.get());
+                            matmulOp->setTransA(true);
+                            continue;                  
+                        }
+                    }
+                }
+                if(const auto &source = tensorB->getSource())
+                {
+                    if(source->getOpType().underlying() == 10)
+                    {
+                        Tensor input = source->getInputs()[0];
+                        Tensor output = source->getOutputs()[0];
+                        auto input_dim = input->getDims();
+                        auto output_dim = output->getDims();
+                        if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2])
+                        {
+                            std::cout << "transpose merge B" << std::endl;
+                            // std::cout << input << std::endl;
+                            Tensor input = source->getInputs()[0];
+                            Tensor output = source->getOutputs()[0];
+                            // update op info
+                            op->removePredecessors(source);
+                            for (auto &predop : source->getPredecessors())
+                            {
+                                predop->removeSuccessors(source);
+                                predop->addSuccessors(op);
+                                op->addPredecessors(predop);
+                            }  
+                            input->removeTarget(source);
+                            input->addTarget(op);
+                            op->replaceInput(output, input);  
+                            auto* matmulOp = dynamic_cast<MatmulObj*>(op.get());
+                            matmulOp->setTransB(true);
+                            this->removeOperator(source);
+                            this->removeTensor(output);
+                            // this->print();
+                            continue;                  
+                        }
+                    }
+                }
+            }
+            i++;
+        }
+        std::cout << "Optimize complete!" << std::endl << std::endl;
+
         // =================================== 作业 ===================================
     }
 
@@ -151,6 +276,23 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：利用 allocator 给计算图分配内存
         // HINT: 获取分配好的内存指针后，可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
+        // Naive Version
+        std::unordered_map<std::shared_ptr<infini::TensorObj>, size_t> tensorToOffset;
+        for(auto tensor : tensors)
+        {
+            tensorToOffset[tensor] = allocator.alloc(tensor->getBytes());
+            // std::cout << "loop1end" << std::endl;
+        }
+        for(auto tensor : tensors)
+        {
+            tensor->setDataBlob(make_ref<BlobObj>
+                (
+                    tensor->runtime,
+                    static_cast<uint8_t *>(allocator.getPtr()) +
+                        tensorToOffset[tensor]
+                )
+            );
+        }
         // =================================== 作业 ===================================
 
         allocator.info();
diff --git a/src/operators/concat.cc b/src/operators/concat.cc
index 1f02bf1..c05edf3 100644
--- a/src/operators/concat.cc
+++ b/src/operators/concat.cc
@@ -1,17 +1,20 @@
 #include "operators/concat.h"
 #include "utils/operator_utils.h"
+#include "core/graph.h"
 
 namespace infini {
 ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim)
     : OperatorObj(OpType::Concat, inputs, {output}) {
+        // TensorVec a = {nullptr};
+        // std::cout << "outputsize:"  << a.size() << std::endl; //这个空元素竟然也会size+1
     int rank = inputs[0]->getRank();
     dim = get_real_axis(_dim, rank);
-    IT_ASSERT(checkValid(graph));
+    IT_ASSERT(checkValid(graph)); //outputs established in here
 }
 
 optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     Shape dims = inputs[0]->getDims();
-    Shape dims_h = inputs[1]->getDims();
+    Shape ans = inputs[0]->getDims();
     auto rank = inputs[0]->getRank();
 
     // =================================== 作业 ===================================
@@ -19,15 +22,18 @@ optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
     for(size_t i = 0;i < rank;i++)
     {
-        if(dims[i]!=dims_h[i])
+        for(size_t j = 1;j < inputs.size();j++)
         {
-            dims[i] += dims_h[i];
-            break;
+            // inputs[j]->print();
+            if(dims[i] != inputs[j]->getDims()[i])
+            {
+                ans[i] += inputs[j]->getDims()[i];
+            }     
         }
     }
     // =================================== 作业 ===================================
 
-    return {{dims}};
+    return {{ans}};
 }
 
 std::string ConcatObj::toString() const {
diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc
index fc87fb1..b6bf2d8 100644
--- a/test/kernels/nativecpu/test_nativecpu_concat.cc
+++ b/test/kernels/nativecpu/test_nativecpu_concat.cc
@@ -14,10 +14,12 @@ TEST(Concat, NativeCpu) {
     auto t2 = g->addTensor({2, 2, 1, 1}, DataType::Float32);
     auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32);
     auto op = g->addOp<ConcatObj>(TensorVec{t1, t2, t3}, nullptr, 2);
+    g->print();
     g->dataMalloc();
     t1->setData(IncrementalGenerator());
     t2->setData(OneGenerator());
     t3->setData(OneGenerator());
+    g->print();
 
     runtime->run(g);
     EXPECT_TRUE(op->getOutput()->equalData(
diff --git a/test/operators/test_concat.cc b/test/operators/test_concat.cc
index 8984b9f..834222c 100644
--- a/test/operators/test_concat.cc
+++ b/test/operators/test_concat.cc
@@ -11,6 +11,7 @@ TEST(Concat, ShapeInfer) {
     auto t2 = g->addTensor({1, 3, 2, 5}, DataType::Float32);
 
     auto op = g->addOp<ConcatObj>(TensorVec{t1, t2}, nullptr, 3);
+    // g->print();
     EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 9}));
 }
 } // namespace infini