docxology · docxology · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
@@ -0,0 +1,3 @@
+## 2024-05-24 - [Optimize Preferential Attachment in Scale-Free Graph Generation]
+**Learning:** The previous implementation used an $O(N)$ random sampling method to approximate preferential attachment by sampling candidate nodes and picking those with the highest degree, but fell back on sorting those sampled nodes at each step. We can achieve exact preferential attachment in $O(m)$ time per node by keeping a flat running list of connected node IDs (where each node ID appears once for every connection it makes, maintaining frequency proportional to degree) and randomly sampling from this flat array "roulette wheel" style.
+**Action:** When implementing probabilistic selections proportional to frequency (like degrees in graphs), prioritize a "roulette wheel" approach with a flat list over constant calculating and sorting of probabilities.
@@ -35,33 +35,30 @@ def build_graph(num_nodes: int, topology: NetworkTopology) -> Graph:
         m = 2  # New edges per node
         # Initial core
         initial_count = max(m + 1, 5)
+        repeated_nodes = []
         for i in range(initial_count):
             for j in range(i + 1, initial_count):
                 src, tgt = node_ids[i], node_ids[j]
                 edge = Edge(source=src, target=tgt)
                 g.edges.append(edge)
                 g.nodes[src].connections.add(tgt)
                 g.nodes[tgt].connections.add(src)
+                repeated_nodes.extend([src, tgt])
 
         # Add remaining nodes
         for i in range(initial_count, num_nodes):
             targets = set()
-            # Probability proportional to degree
-            # Simplified: just pick from existing list weighted by degree
-            existing = node_ids[:i]
-            # Since strict PA is expensive O(N^2), use random sample approximation
-            # or just pick m nodes if small
-            candidates = random.sample(existing, min(len(existing), m * 2))
-            # Sort by degree
-            candidates.sort(key=lambda nid: len(g.nodes[nid].connections), reverse=True)
-            targets = set(candidates[:m])
-
-            for t in targets:
-                src, tgt = node_ids[i], t
+            src = node_ids[i]
+
+            while len(targets) < m:
+                targets.add(random.choice(repeated_nodes))
+
+            for tgt in targets:
                 edge = Edge(source=src, target=tgt)
                 g.edges.append(edge)
                 g.nodes[src].connections.add(tgt)
                 g.nodes[tgt].connections.add(src)
+                repeated_nodes.extend([src, tgt])
 
     return g