fix ncnn2int8 top_blob int8 scales saving for dequant only style (#6425)

nihui · web-flow · commit 430b3124a75d · 2025-11-26T17:31:57.000+08:00
diff --git a/docs/how-to-use-and-FAQ/quantized-int8-inference.md b/docs/how-to-use-and-FAQ/quantized-int8-inference.md
@@ -8,6 +8,8 @@ Example with mobilenet, just need three steps.
 
 ### 1. Optimize model
 
+NOTE: **If your model is converted via pnnx, skip this step.**
+
 ```shell
 ./ncnnoptimize mobilenet.param mobilenet.bin mobilenet-opt.param mobilenet-opt.bin 0
 ```
diff --git a/tools/modelwriter.h b/tools/modelwriter.h
@@ -900,7 +900,10 @@ int ModelWriter::save(const char* parampath, const char* binpath)
                 {
                     fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
                     fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
-                    fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                    if (op->int8_scale_term > 100)
+                    {
+                        fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                    }
                 }
 #endif // NCNN_INT8
             }
@@ -1073,7 +1076,10 @@ int ModelWriter::save(const char* parampath, const char* binpath)
                 {
                     fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
                     fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
-                    fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                    if (op->int8_scale_term > 100)
+                    {
+                        fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                    }
                 }
 #endif // NCNN_INT8
             }

Original file line number	Diff line number	Diff line change
`@@ -900,7 +900,10 @@ int ModelWriter::save(const char* parampath, const char* binpath)`
`900`	`900`	`{`
`901`	`901`	`fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);`
`902`	`902`	`fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);`
`903`		`- fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);`
	`903`	`+ if (op->int8_scale_term > 100)`
	`904`	`+ {`
	`905`	`+ fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);`
	`906`	`+ }`
`904`	`907`	`}`
`905`	`908`	`#endif // NCNN_INT8`
`906`	`909`	`}`
`@@ -1073,7 +1076,10 @@ int ModelWriter::save(const char* parampath, const char* binpath)`
`1073`	`1076`	`{`
`1074`	`1077`	`fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);`
`1075`	`1078`	`fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);`
`1076`		`- fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);`
	`1079`	`+ if (op->int8_scale_term > 100)`
	`1080`	`+ {`
	`1081`	`+ fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);`
	`1082`	`+ }`
`1077`	`1083`	`}`
`1078`	`1084`	`#endif // NCNN_INT8`
`1079`	`1085`	`}`