srugano · srugano · Sep 2, 2025 · Sep 5, 2025
diff --git a/packages/postgresql-pgml/release.sh b/packages/postgresql-pgml/release.sh
@@ -72,14 +72,14 @@ build_packages() {
       $(package_name ${pg} ${ubuntu_version} ${ARCH})
 
     # Upload to S3
-    deb-s3 upload \
-      --visibility=public \
-      --bucket apt.postgresml.org \
-      $(package_name ${pg} ${ubuntu_version} ${ARCH}) \
-      --codename ${codename}
+    #deb-s3 upload \
+    #  --visibility=public \
+    # --bucket apt.postgresml.org \
+    #  $(package_name ${pg} ${ubuntu_version} ${ARCH}) \
+    #  --codename ${codename}
 
     # Clean up the package file
-    rm $(package_name ${pg} ${ubuntu_version} ${ARCH})
+    #rm $(package_name ${pg} ${ubuntu_version} ${ARCH})
   done
 }
 
@@ -97,4 +97,4 @@ else
   for ubuntu_version in "${!ubuntu_versions[@]}"; do
     build_packages "$ubuntu_version" "${ubuntu_versions[$ubuntu_version]}"
   done
-fi
+fi
diff --git a/pgml-cms/blog/serverless-llms-are-dead-long-live-serverless-llms.md b/pgml-cms/blog/serverless-llms-are-dead-long-live-serverless-llms.md
@@ -64,9 +64,6 @@ Because we’ve curated the best in class models, they will always be instantly
 
 Your application can instantly burst usage to massive scale without a second thought, other than the aforementioned cost of GPU usage. Financial costs are now the limiting factor, but we have an additional new lever to optimize costs even further.
 
-### Multi-tenant continuous batching
-It’s not just loading the model weights into GPU RAM the first time that’s expensive. Streaming those weights from GPU RAM to the CUDA cores for each request is actually the bottleneck for most LLM applications. Continuous batching allows us to reuse a single layer of weights for multiple different queries at the same time, further reducing costs, without significantly impacting overall latency. Thanks to vLLM team for [this impressive breakthrough](https://arxiv.org/abs/2309.06180) in performance.
-
 ### Simplified pricing
 Compared to using a host of services to provide comparable functionality, our pricing is significantly simpler. We charge for: