Skip to content

Commit 9a6e584

Browse files
author
smarunich
committed
expanding the functionality
1 parent e964be4 commit 9a6e584

File tree

10 files changed

+1692
-396
lines changed

10 files changed

+1692
-396
lines changed

.kiro/specs/model-publishing-workflow/tasks.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
11
# Implementation Plan
22

3-
- [ ] 1. Set up core publishing infrastructure and types
3+
- [x] 1. Set up core publishing infrastructure and types
44
- Create PublishingService struct and basic methods in `management/publishing.go`
55
- Add publishing-related types to `management/types.go` (PublishConfig, PublishedModel, RateLimitConfig, etc.)
66
- Add publishing service initialization to `management/main.go`
77
- _Requirements: 1.1, 1.2, 1.3_
88

9-
- [ ] 2. Extend K8s client with Gateway API operations
10-
- [ ] 2.1 Add HTTPRoute CRUD operations to `management/k8s.go`
9+
- [x] 2. Extend K8s client with Gateway API operations
10+
- [x] 2.1 Add HTTPRoute CRUD operations to `management/k8s.go`
1111
- Implement CreateHTTPRoute, UpdateHTTPRoute, DeleteHTTPRoute methods
1212
- Add HTTPRoute resource validation and error handling
1313
- _Requirements: 1.4, 1.6_
1414

15-
- [ ] 2.2 Add AIGatewayRoute CRUD operations to `management/k8s.go`
15+
- [x] 2.2 Add AIGatewayRoute CRUD operations to `management/k8s.go`
1616
- Implement CreateAIGatewayRoute, UpdateAIGatewayRoute, DeleteAIGatewayRoute methods
1717
- Add AIGatewayRoute resource validation for OpenAI compatibility
1818
- _Requirements: 1.5, 1.6_
1919

20-
- [ ] 2.3 Add BackendTrafficPolicy operations for rate limiting
20+
- [x] 2.3 Add BackendTrafficPolicy operations for rate limiting
2121
- Implement CreateBackendTrafficPolicy, UpdateBackendTrafficPolicy, DeleteBackendTrafficPolicy methods
2222
- Add rate limiting policy validation and configuration
2323
- _Requirements: 3.1, 3.2, 3.3_
2424

25-
- [ ] 2.4 Add API key secret management operations
25+
- [x] 2.4 Add API key secret management operations
2626
- Implement CreateAPIKeySecret, UpdateAPIKeySecret, DeleteAPIKeySecret methods
2727
- Add secure API key generation and storage functionality
2828
- _Requirements: 1.3, 4.3_
@@ -91,7 +91,7 @@
9191
- Maintain published endpoints during updates
9292
- _Requirements: 4.2, 4.4_
9393

94-
- [ ] 6. Add monitoring and usage tracking
94+
- [-] 6. Add monitoring and usage tracking
9595
- [ ] 6.1 Implement usage statistics collection
9696
- Track API requests, tokens used, and access patterns per published model
9797
- Add metrics for rate limit violations and authentication failures
@@ -104,26 +104,26 @@
104104
- Add security event logging for unauthorized access attempts
105105
- _Requirements: 6.1, 6.3_
106106

107-
- [ ] 7. Create React UI components for publishing
108-
- [ ] 7.1 Create PublishingForm component
107+
- [x] 7. Create React UI components for publishing
108+
- [x] 7.1 Create PublishingForm component
109109
- Build form for configuring model publishing settings
110110
- Add rate limiting configuration options
111111
- Include tenant selection and access control settings
112112
- _Requirements: 1.1, 3.1, 3.4_
113113

114-
- [ ] 7.2 Create PublishedModelsList component
114+
- [x] 7.2 Create PublishedModelsList component
115115
- Display list of published models with status and usage metrics
116116
- Add management actions (unpublish, update, rotate keys)
117117
- Show API endpoint information and documentation
118118
- _Requirements: 4.1, 4.2, 5.1_
119119

120-
- [ ] 7.3 Create APIKeyManager component
120+
- [x] 7.3 Create APIKeyManager component
121121
- Display API keys with copy-to-clipboard functionality
122122
- Add key rotation interface with confirmation dialogs
123123
- Show key usage statistics and last access times
124124
- _Requirements: 4.3, 5.1_
125125

126-
- [ ] 7.4 Extend ModelList component with publish actions
126+
- [x] 7.4 Extend ModelList component with publish actions
127127
- Add "Publish" button to model list items
128128
- Show publishing status indicators
129129
- Add quick access to published model endpoints
@@ -142,7 +142,7 @@
142142
- Create language-specific SDK examples (Python, JavaScript, curl)
143143
- _Requirements: 5.2, 5.4, 5.5_
144144

145-
- [ ] 9. Implement comprehensive error handling
145+
- [-] 9. Implement comprehensive error handling
146146
- [ ] 9.1 Add publishing validation errors
147147
- Handle model not found, not ready, and invalid tenant scenarios
148148
- Add gateway configuration failure handling

management/errors.go

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"strings"
7+
"time"
8+
)
9+
10+
// PublishingError represents a publishing-specific error with context
11+
type PublishingError struct {
12+
Code string
13+
Message string
14+
Details string
15+
Cause error
16+
Namespace string
17+
ModelName string
18+
Step string
19+
}
20+
21+
func (e *PublishingError) Error() string {
22+
if e.Details != "" {
23+
return fmt.Sprintf("%s: %s - %s", e.Code, e.Message, e.Details)
24+
}
25+
return fmt.Sprintf("%s: %s", e.Code, e.Message)
26+
}
27+
28+
func (e *PublishingError) Unwrap() error {
29+
return e.Cause
30+
}
31+
32+
// NewPublishingError creates a new publishing error
33+
func NewPublishingError(code, message, namespace, modelName, step string, cause error) *PublishingError {
34+
details := ""
35+
if cause != nil {
36+
details = cause.Error()
37+
}
38+
39+
return &PublishingError{
40+
Code: code,
41+
Message: message,
42+
Details: details,
43+
Cause: cause,
44+
Namespace: namespace,
45+
ModelName: modelName,
46+
Step: step,
47+
}
48+
}
49+
50+
// PublishingRollback handles rollback operations when publishing fails
51+
type PublishingRollback struct {
52+
service *PublishingService
53+
namespace string
54+
modelName string
55+
steps []string
56+
}
57+
58+
// NewPublishingRollback creates a new rollback handler
59+
func NewPublishingRollback(service *PublishingService, namespace, modelName string) *PublishingRollback {
60+
return &PublishingRollback{
61+
service: service,
62+
namespace: namespace,
63+
modelName: modelName,
64+
steps: make([]string, 0),
65+
}
66+
}
67+
68+
// AddStep adds a step to the rollback list
69+
func (r *PublishingRollback) AddStep(step string) {
70+
r.steps = append(r.steps, step)
71+
}
72+
73+
// Execute performs the rollback operations
74+
func (r *PublishingRollback) Execute() {
75+
log.Printf("Starting rollback for model %s/%s", r.namespace, r.modelName)
76+
77+
// Rollback in reverse order
78+
for i := len(r.steps) - 1; i >= 0; i-- {
79+
step := r.steps[i]
80+
log.Printf("Rolling back step: %s", step)
81+
82+
switch step {
83+
case "api_key":
84+
r.service.cleanupAPIKey(r.namespace, r.modelName)
85+
case "gateway_config":
86+
r.service.cleanupGatewayConfiguration(r.namespace, r.modelName)
87+
case "rate_limiting":
88+
r.service.cleanupRateLimitingPolicy(r.namespace, r.modelName)
89+
case "metadata":
90+
r.service.cleanupPublishedModelMetadata(r.namespace, r.modelName)
91+
default:
92+
log.Printf("Unknown rollback step: %s", step)
93+
}
94+
}
95+
96+
log.Printf("Rollback completed for model %s/%s", r.namespace, r.modelName)
97+
}
98+
99+
// ValidationError represents validation errors during publishing
100+
type ValidationError struct {
101+
Field string
102+
Value interface{}
103+
Message string
104+
}
105+
106+
func (e *ValidationError) Error() string {
107+
return fmt.Sprintf("validation error for field '%s': %s", e.Field, e.Message)
108+
}
109+
110+
// PublishingValidator handles validation of publishing requests
111+
type PublishingValidator struct {
112+
service *PublishingService
113+
}
114+
115+
// NewPublishingValidator creates a new validator
116+
func NewPublishingValidator(service *PublishingService) *PublishingValidator {
117+
return &PublishingValidator{
118+
service: service,
119+
}
120+
}
121+
122+
// ValidatePublishRequest validates a publish request
123+
func (v *PublishingValidator) ValidatePublishRequest(namespace, modelName string, config PublishConfig) []ValidationError {
124+
var errors []ValidationError
125+
126+
// Validate model exists and is ready
127+
if err := v.service.validateModelExists(namespace, modelName); err != nil {
128+
errors = append(errors, ValidationError{
129+
Field: "model",
130+
Value: modelName,
131+
Message: fmt.Sprintf("Model validation failed: %v", err),
132+
})
133+
}
134+
135+
// Validate tenant ID
136+
if config.TenantID == "" {
137+
errors = append(errors, ValidationError{
138+
Field: "tenantId",
139+
Value: config.TenantID,
140+
Message: "Tenant ID is required",
141+
})
142+
}
143+
144+
// Validate rate limiting configuration
145+
if config.RateLimiting.RequestsPerMinute <= 0 {
146+
errors = append(errors, ValidationError{
147+
Field: "rateLimiting.requestsPerMinute",
148+
Value: config.RateLimiting.RequestsPerMinute,
149+
Message: "Requests per minute must be greater than 0",
150+
})
151+
}
152+
153+
if config.RateLimiting.RequestsPerHour <= 0 {
154+
errors = append(errors, ValidationError{
155+
Field: "rateLimiting.requestsPerHour",
156+
Value: config.RateLimiting.RequestsPerHour,
157+
Message: "Requests per hour must be greater than 0",
158+
})
159+
}
160+
161+
if config.RateLimiting.RequestsPerMinute > config.RateLimiting.RequestsPerHour {
162+
errors = append(errors, ValidationError{
163+
Field: "rateLimiting",
164+
Value: nil,
165+
Message: "Requests per minute cannot exceed requests per hour",
166+
})
167+
}
168+
169+
// Validate model type
170+
if config.ModelType != "" && config.ModelType != "traditional" && config.ModelType != "openai" {
171+
errors = append(errors, ValidationError{
172+
Field: "modelType",
173+
Value: config.ModelType,
174+
Message: "Model type must be 'traditional' or 'openai'",
175+
})
176+
}
177+
178+
// Validate external path
179+
if config.ExternalPath != "" {
180+
if !strings.HasPrefix(config.ExternalPath, "/") {
181+
errors = append(errors, ValidationError{
182+
Field: "externalPath",
183+
Value: config.ExternalPath,
184+
Message: "External path must start with '/'",
185+
})
186+
}
187+
}
188+
189+
// Validate authentication configuration
190+
if config.Authentication.Type != "apikey" {
191+
errors = append(errors, ValidationError{
192+
Field: "authentication.type",
193+
Value: config.Authentication.Type,
194+
Message: "Only 'apikey' authentication is currently supported",
195+
})
196+
}
197+
198+
return errors
199+
}
200+
201+
// RecoveryHandler handles recovery from publishing failures
202+
type RecoveryHandler struct {
203+
service *PublishingService
204+
}
205+
206+
// NewRecoveryHandler creates a new recovery handler
207+
func NewRecoveryHandler(service *PublishingService) *RecoveryHandler {
208+
return &RecoveryHandler{
209+
service: service,
210+
}
211+
}
212+
213+
// RecoverFromFailure attempts to recover from a publishing failure
214+
func (r *RecoveryHandler) RecoverFromFailure(namespace, modelName string, err error) error {
215+
log.Printf("Attempting recovery for model %s/%s after error: %v", namespace, modelName, err)
216+
217+
// Check if model is partially published
218+
isPublished := r.service.isModelPublished(namespace, modelName)
219+
220+
if isPublished {
221+
log.Printf("Model %s/%s appears to be partially published, attempting cleanup", namespace, modelName)
222+
223+
// Perform cleanup
224+
r.service.cleanupAPIKey(namespace, modelName)
225+
r.service.cleanupGatewayConfiguration(namespace, modelName)
226+
r.service.cleanupRateLimitingPolicy(namespace, modelName)
227+
r.service.cleanupPublishedModelMetadata(namespace, modelName)
228+
229+
log.Printf("Cleanup completed for model %s/%s", namespace, modelName)
230+
}
231+
232+
return nil
233+
}
234+
235+
// ErrorReporter handles error reporting and logging
236+
type ErrorReporter struct {
237+
service *PublishingService
238+
}
239+
240+
// NewErrorReporter creates a new error reporter
241+
func NewErrorReporter(service *PublishingService) *ErrorReporter {
242+
return &ErrorReporter{
243+
service: service,
244+
}
245+
}
246+
247+
// ReportError reports an error with context
248+
func (r *ErrorReporter) ReportError(user *User, namespace, modelName, operation string, err error) {
249+
// Log the error
250+
log.Printf("Publishing error - User: %s, Model: %s/%s, Operation: %s, Error: %v",
251+
user.Name, namespace, modelName, operation, err)
252+
253+
// Create error log entry
254+
errorEntry := map[string]interface{}{
255+
"timestamp": time.Now().Format(time.RFC3339),
256+
"user": user.Name,
257+
"tenant": user.Tenant,
258+
"operation": operation,
259+
"model": modelName,
260+
"namespace": namespace,
261+
"error": err.Error(),
262+
"level": "error",
263+
}
264+
265+
// Store error in audit log
266+
errorLogName := fmt.Sprintf("publishing-errors-%s", time.Now().Format("2006-01-02"))
267+
268+
// Try to get existing error log for today
269+
existingLog, logErr := r.service.k8sClient.GetConfigMap(namespace, errorLogName)
270+
if logErr != nil {
271+
// Create new error log
272+
errorData := map[string]interface{}{
273+
"entries": []interface{}{errorEntry},
274+
}
275+
r.service.k8sClient.CreateConfigMap(namespace, errorLogName, errorData)
276+
} else {
277+
// Append to existing error log
278+
if entries, ok := existingLog["entries"].([]interface{}); ok {
279+
entries = append(entries, errorEntry)
280+
existingLog["entries"] = entries
281+
r.service.k8sClient.UpdateConfigMap(namespace, errorLogName, existingLog)
282+
}
283+
}
284+
}

0 commit comments

Comments
 (0)