Skip to content
This repository was archived by the owner on Jun 26, 2023. It is now read-only.

Commit c157478

Browse files
authored
Merge pull request #348 from charleszheng44/crt-timeout-bug
fix creation timeout bug.
2 parents b465723 + f6c9a77 commit c157478

File tree

2 files changed

+107
-99
lines changed

2 files changed

+107
-99
lines changed

incubator/virtualcluster/pkg/controller/util/strings/util.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ limitations under the License.
1616

1717
package strings
1818

19-
import "encoding/json"
19+
import (
20+
"strings"
21+
)
2022

2123
// ContainString checks if string slice sli contains string s
2224
func ContainString(sli []string, s string) bool {
@@ -39,8 +41,16 @@ func RemoveString(sli []string, s string) (newSli []string) {
3941
return
4042
}
4143

42-
// IsJSON check whether given s is in json format
43-
func IsJSON(s string) bool {
44-
var js map[string]interface{}
45-
return json.Unmarshal([]byte(s), &js) == nil
44+
// SplitFields splits string s into substrings separated by delimiters in
45+
// rs and returns a slice of the substrings
46+
func SplitFields(s string, rs ...rune) []string {
47+
fn := func(ru rune) bool {
48+
for _, r := range rs {
49+
if ru == r {
50+
return true
51+
}
52+
}
53+
return false
54+
}
55+
return strings.FieldsFunc(s, fn)
4656
}

incubator/virtualcluster/pkg/controller/virtualcluster/master_provisioner_aliyun.go

Lines changed: 92 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -56,37 +56,27 @@ const (
5656
AliyunASKCfgMpRegionID = "askRegionID"
5757
AliyunASKCfgMpZoneID = "askZoneID"
5858
AliyunASKCfgMpVPCID = "askVpcID"
59+
AliyunASKCfgMpVSID = "askVswitchID"
5960

6061
AnnotationClusterIDKey = "clusterID"
6162
)
6263

6364
type ASKConfig struct {
64-
vpcID string
65-
regionID string
66-
zoneID string
65+
vpcID string
66+
vswitchID string
67+
regionID string
68+
zoneID string
6769
}
6870

69-
type AliyunSDKErrCode string
70-
7171
const (
7272
// full list of potential API errors can be found at
7373
// https://error-center.alibabacloud.com/status/product/Cos?spm=a2c69.11428812.home.7.2247bb9adTOFxm
74-
OprationNotSupported AliyunSDKErrCode = "ErrorCheckAcl"
75-
ClusterNotFound AliyunSDKErrCode = "ErrorClusterNotFound"
76-
ClusterNameAlreadyExist AliyunSDKErrCode = "ClusterNameAlreadyExist"
74+
OprationNotSupported = "ErrorCheckAcl"
75+
ClusterNotFound = "ErrorClusterNotFound"
76+
ClusterNameAlreadyExist = "ClusterNameAlreadyExist"
77+
QueryClusterError = "ErrorQueryCluster"
7778
)
7879

79-
// AliyunSDKErr holds the information of the error response returned by aliyun
80-
type AliyunSDKErr struct {
81-
errorName string
82-
errorCode AliyunSDKErrCode
83-
errorMessage string
84-
}
85-
86-
func (ase *AliyunSDKErr) Error() string {
87-
return fmt.Sprintf("Aliyun SDK Error: errorName(%s), errorCode(%s), errorMessage(%s)", ase.errorName, ase.errorCode, ase.errorMessage)
88-
}
89-
9080
type MasterProvisionerAliyun struct {
9181
client.Client
9282
scheme *runtime.Scheme
@@ -156,38 +146,35 @@ func sendCreationRequest(cli *sdk.Client, clusterName string, askCfg ASKConfig)
156146
request.QueryParams["RegionId"] = askCfg.regionID
157147

158148
// set vpc, if vpcID is specified
159-
var vpcIDEntry string
149+
var body string
160150
if askCfg.vpcID != "" {
161-
vpcIDEntry = fmt.Sprintf("\"\nvpc_id\": %s\n", askCfg.vpcID)
151+
body = fmt.Sprintf(`{
152+
"cluster_type": "Ask",
153+
"name": "%s",
154+
"region_id": "%s",
155+
"zoneid": "%s",
156+
"vpc_id": "%s",
157+
"vswitch_id": "%s",
158+
"nat_gateway": false,
159+
"private_zone": true
160+
}`, clusterName, askCfg.regionID, askCfg.zoneID, askCfg.vpcID, askCfg.vswitchID)
162161
} else {
163-
log.Info("vpcID is not specified, a new vpc will be created")
164-
}
165-
body := fmt.Sprintf(`{
162+
body = fmt.Sprintf(`{
166163
"cluster_type": "Ask",
167164
"name": "%s",
168165
"region_id": "%s",
169-
"zoneid": "%s", %s
166+
"zoneid": "%s",
170167
"nat_gateway": true,
171168
"private_zone": true
172-
}`, clusterName, askCfg.regionID, askCfg.zoneID, vpcIDEntry)
169+
}`, clusterName, askCfg.regionID, askCfg.zoneID)
170+
}
173171

174172
request.Content = []byte(body)
175173
response, err := cli.ProcessCommonRequest(request)
176174
if err != nil {
175+
177176
return "", err
178177
}
179-
if sdkErr := isErrResponse(response.GetHttpContentString()); sdkErr != nil {
180-
if sdkErr.errorCode == ClusterNameAlreadyExist {
181-
// clusterName already exists, query Aliyun to get the clusterID
182-
// corresponding to the clusterName
183-
clsID, getClsIDErr := getClusterIDByName(cli, clusterName, askCfg.regionID)
184-
if getClsIDErr != nil {
185-
return "", getClsIDErr
186-
}
187-
return clsID, nil
188-
}
189-
return "", sdkErr
190-
}
191178

192179
// cluster information of the newly created ASK in json format
193180
clsInfo := make(map[string]string)
@@ -201,42 +188,22 @@ func sendCreationRequest(cli *sdk.Client, clusterName string, askCfg ASKConfig)
201188
return clusterID, nil
202189
}
203190

204-
// isErrResponse checks if given responseBody belongs to an error response,
205-
// if yes, the responseBody is parsed and an AliyunSDKErr is returned
206-
func isErrResponse(responseBody string) *AliyunSDKErr {
207-
// on success, the response body is in JSON format
208-
if strutil.IsJSON(responseBody) {
209-
return nil
210-
}
211-
// an error response body:
191+
func isSDKErr(err error) bool {
192+
return strings.HasPrefix(err.Error(), "SDK.ServerError")
193+
}
194+
195+
func getSDKErrCode(err error) string {
196+
// an SDK error looks like:
212197
//
213-
// ERROR: SDK.ServerError
198+
// SDK.ServerError
214199
// ErrorCode:
215200
// Recommend:
216201
// RequestId:
217202
// Message: {"code":"ClusterNameAlreadyExist","message":"cluster name {XXX} already exist in your clusters","requestId":"C2D0F836-DD3D-4749-97AB-10AE8371BABE","status":400}
218-
sdkErr := &AliyunSDKErr{}
219-
errEntries := strings.Split(responseBody, "\n")
220-
sdkErr.errorName = (strings.Split(errEntries[0], ": "))[1]
221-
errorCode := (strings.Split(errEntries[4], ":"))[2]
222-
// remove quotes around the string
223-
sdkErr.errorCode = AliyunSDKErrCode(errorCode[1 : len(errorCode)-1])
224-
errorMessage := (strings.Split(errEntries[4], ":"))[4]
225-
sdkErr.errorMessage = errorMessage[1 : len(errorMessage)-1]
226-
return sdkErr
227-
}
228-
229-
// clusterNotFoundErr checks if given err is ASK ClusterNotFound Error
230-
func clusterNotFoundErr(err error) bool {
231-
ase, ok := err.(*AliyunSDKErr)
232-
if !ok {
233-
return false
234-
}
235-
if ase.errorCode == ClusterNotFound ||
236-
ase.errorCode == OprationNotSupported {
237-
return true
238-
}
239-
return false
203+
errMsg := strings.Split(err.Error(), "\n")[4]
204+
errCodeWithQuote := strutil.SplitFields(errMsg, ':', ',')[2]
205+
// remove surrounding quotes
206+
return errCodeWithQuote[1 : len(errCodeWithQuote)-1]
240207
}
241208

242209
// getASKState gets the latest state of the ASK with the given clusterID
@@ -253,10 +220,6 @@ func getASKState(cli *sdk.Client, clusterID, regionID string) (string, error) {
253220
if err != nil {
254221
return "", err
255222
}
256-
errRep := isErrResponse(response.GetHttpContentString())
257-
if errRep != nil {
258-
return "", err
259-
}
260223

261224
var clsInfo map[string]interface{}
262225
if err := json.Unmarshal(response.GetHttpContentBytes(), &clsInfo); err != nil {
@@ -377,24 +340,29 @@ func (mpa *MasterProvisionerAliyun) getASKConfigs() (cfg ASKConfig, err error) {
377340
err = getErr
378341
}
379342

380-
regionID, exist := ASKCfgMp.Data[AliyunASKCfgMpRegionID]
381-
if !exist {
343+
regionID, riExist := ASKCfgMp.Data[AliyunASKCfgMpRegionID]
344+
if !riExist {
382345
err = fmt.Errorf("%s not exist", AliyunASKCfgMpRegionID)
383346
return
384347
}
385348
cfg.regionID = regionID
386349

387-
zoneID, exist := ASKCfgMp.Data[AliyunASKCfgMpZoneID]
388-
if !exist {
350+
zoneID, ziExist := ASKCfgMp.Data[AliyunASKCfgMpZoneID]
351+
if !ziExist {
389352
err = fmt.Errorf("%s not exist", AliyunASKCfgMpZoneID)
390353
return
391354
}
392355
cfg.zoneID = zoneID
393356

394-
vpcID, exist := ASKCfgMp.Data[AliyunASKCfgMpVPCID]
395-
if exist {
357+
vpcID, viExist := ASKCfgMp.Data[AliyunASKCfgMpVPCID]
358+
vsID, vsiExist := ASKCfgMp.Data[AliyunASKCfgMpVSID]
359+
if viExist != vsiExist {
360+
err = errors.New("vswitchID and vpcID need to be used together")
361+
}
362+
363+
if viExist && vsiExist {
396364
cfg.vpcID = vpcID
397-
return
365+
cfg.vswitchID = vsID
398366
}
399367

400368
return
@@ -421,29 +389,55 @@ func (mpa *MasterProvisionerAliyun) CreateVirtualCluster(vc *tenancyv1alpha1.Vir
421389
return err
422390
}
423391

424-
clsID, err := sendCreationRequest(cli, vc.Name, askCfg)
392+
var (
393+
clsID string
394+
clsState string
395+
)
396+
creationTimeout := time.After(100 * time.Second)
397+
clsID, err = sendCreationRequest(cli, vc.Name, askCfg)
425398
if err != nil {
426-
return err
399+
if !isSDKErr(err) {
400+
return err
401+
}
402+
// check SDK error code
403+
if getSDKErrCode(err) == ClusterNameAlreadyExist {
404+
// clusterName already exists, query Aliyun to get the clusterID
405+
// corresponding to the clusterName
406+
var getClsIDErr error
407+
clsID, getClsIDErr = getClusterIDByName(cli, vc.Name, askCfg.regionID)
408+
if getClsIDErr != nil {
409+
return getClsIDErr
410+
}
411+
var getStErr error
412+
clsState, getStErr = getASKState(cli, clsID, askCfg.regionID)
413+
if getStErr != nil {
414+
return getStErr
415+
}
416+
417+
if clsState != "running" && clsState != "initial" {
418+
return fmt.Errorf("unknown ASK(%s) state: %s", vc.Name, clsState)
419+
}
420+
}
427421
}
428422

429-
log.Info("ASK is creating", "ASK-ID", clsID)
423+
log.Info("creating the ASK", "ASK-ID", clsID)
430424

431425
// 3. block until the newly created ASK is up and running
432-
creationTimeout := time.After(120 * time.Second)
433-
434-
OuterLoop:
426+
PollASK:
435427
for {
436428
select {
437429
case <-time.After(10 * time.Second):
438-
clsState, err := getASKState(cli, clsID, askCfg.regionID)
439-
if err != nil {
440-
return err
441-
}
442430
if clsState == "running" {
443431
// ASK is up and running, stop polling
444432
log.Info("ASK is up and running", "ASK-ID", clsID)
445-
break OuterLoop
433+
break PollASK
434+
}
435+
var getStErr error
436+
clsState, getStErr = getASKState(cli, clsID, askCfg.regionID)
437+
if getStErr != nil {
438+
return getStErr
446439
}
440+
447441
case <-creationTimeout:
448442
return fmt.Errorf("creating cluster(%s) timeout", clsID)
449443
}
@@ -518,13 +512,17 @@ OuterLoop:
518512
case <-time.After(2 * time.Second):
519513
state, err := getASKState(cli, clusterID, askCfg.regionID)
520514
if err != nil {
521-
if clusterNotFoundErr(err) {
522-
log.Info("corresponding ASK cluster is not found", "vc-name", vc.Name)
523-
break OuterLoop
515+
if isSDKErr(err) {
516+
if getSDKErrCode(err) == ClusterNotFound {
517+
log.Info("corresponding ASK cluster is not found", "vc-name", vc.Name)
518+
break OuterLoop
519+
}
524520
}
525521
return err
526522
}
527523
if state == "deleting" {
524+
// once the ASK cluster enter the 'deleting' state, the cloud
525+
// provider will delete the cluster
528526
log.Info("ASK cluster is being deleted")
529527
break OuterLoop
530528
}

0 commit comments

Comments
 (0)