Skip to content

Commit 5e28e31

Browse files
authored
fix aio exception handling (#2084)
1 parent 58cf5a6 commit 5e28e31

File tree

3 files changed

+64
-64
lines changed

3 files changed

+64
-64
lines changed

nvflare/fuel/f3/drivers/aio_context.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,19 @@ def get_event_loop(self):
4242

4343
return self.loop
4444

45+
def _handle_exception(self, loop, context):
46+
try:
47+
msg = context.get("exception", context["message"])
48+
self.logger.debug(f"AIO Exception: {msg}")
49+
except Exception as ex:
50+
# ignore exception in the exception handler
51+
self.logger.debug(f"exception in aio exception handler: {ex}")
52+
4553
def run_aio_loop(self):
4654
self.logger.debug(f"{self.name}: started AioContext in thread {threading.current_thread().name}")
4755
# self.loop = asyncio.get_event_loop()
4856
self.loop = asyncio.new_event_loop()
57+
self.loop.set_exception_handler(self._handle_exception)
4958
asyncio.set_event_loop(self.loop)
5059
self.logger.debug(f"{self.name}: got loop: {id(self.loop)}")
5160
self.ready.set()

nvflare/fuel/f3/drivers/aio_grpc_driver.py

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ def __init__(self, aio_ctx: AioContext, connector: ConnectorInfo, conn_props: di
6969

7070
conf = CommConfigurator()
7171
if conf.get_bool_var("simulate_unstable_network", default=False):
72-
self.disconn = threading.Thread(target=self._disconnect, daemon=True)
73-
self.disconn.start()
72+
if context:
73+
# only server side
74+
self.disconn = threading.Thread(target=self._disconnect, daemon=True)
75+
self.disconn.start()
7476

7577
def _disconnect(self):
7678
t = random.randint(10, 60)
@@ -88,9 +90,11 @@ def close(self):
8890
if self.context:
8991
self.aio_ctx.run_coro(self.context.abort(grpc.StatusCode.CANCELLED, "service closed"))
9092
self.context = None
93+
self.logger.info("Closed GRPC context")
9194
if self.channel:
9295
self.aio_ctx.run_coro(self.channel.close())
9396
self.channel = None
97+
self.logger.info("Closed GRPC Channel")
9498

9599
def send_frame(self, frame: BytesAlike):
96100
try:
@@ -298,57 +302,42 @@ async def _start_connect(self, connector: ConnectorInfo, aio_ctx: AioContext, co
298302
address = get_address(params)
299303

300304
self.logger.debug(f"CLIENT: trying to connect {address}")
305+
connection = None
301306
try:
302307
secure = ssl_required(params)
303308
if secure:
304-
grpc_channel = grpc.aio.secure_channel(
309+
channel = grpc.aio.secure_channel(
305310
address, options=self.options, credentials=get_grpc_client_credentials(params)
306311
)
307312
self.logger.info(f"created secure channel at {address}")
308313
else:
309-
grpc_channel = grpc.aio.insecure_channel(address, options=self.options)
314+
channel = grpc.aio.insecure_channel(address, options=self.options)
310315
self.logger.info(f"created insecure channel at {address}")
316+
stub = StreamerStub(channel)
311317

312-
async with grpc_channel as channel:
313-
self.logger.debug(f"CLIENT: connected to {address}")
314-
stub = StreamerStub(channel)
315-
conn_props = {DriverParams.PEER_ADDR.value: address}
316-
317-
if secure:
318-
conn_props[DriverParams.PEER_CN.value] = "N/A"
318+
self.logger.debug(f"CLIENT: connected to {address}")
319+
conn_props = {DriverParams.PEER_ADDR.value: address}
319320

320-
connection = AioStreamSession(
321-
aio_ctx=aio_ctx, connector=connector, conn_props=conn_props, channel=channel
322-
)
321+
if secure:
322+
conn_props[DriverParams.PEER_CN.value] = "N/A"
323323

324-
try:
325-
self.logger.debug(f"CLIENT: start streaming on connection {connection}")
326-
msg_iter = stub.Stream(connection.generate_output())
327-
conn_ctx.conn = connection
328-
await connection.read_loop(msg_iter)
329-
except asyncio.CancelledError as error:
330-
self.logger.debug(f"CLIENT: RPC cancelled: {error}")
331-
except Exception as ex:
332-
if self.closing:
333-
self.logger.debug(
334-
f"Connection {connection} closed by {type(ex)}: {secure_format_exception(ex)}"
335-
)
336-
else:
337-
self.logger.debug(
338-
f"Connection {connection} client read exception {type(ex)}: {secure_format_exception(ex)}"
339-
)
340-
self.logger.debug(secure_format_traceback())
324+
connection = AioStreamSession(aio_ctx=aio_ctx, connector=connector, conn_props=conn_props, channel=channel)
341325

342-
with connection.lock:
343-
connection.channel = None
344-
connection.close()
326+
self.logger.debug(f"CLIENT: start streaming on connection {connection}")
327+
msg_iter = stub.Stream(connection.generate_output())
328+
conn_ctx.conn = connection
329+
await connection.read_loop(msg_iter)
345330
except asyncio.CancelledError:
346331
self.logger.debug("CLIENT: RPC cancelled")
332+
except grpc.FutureCancelledError:
333+
self.logger.info("CLIENT: Future cancelled")
347334
except Exception as ex:
348335
conn_ctx.error = f"connection {connection} error: {type(ex)}: {secure_format_exception(ex)}"
349336
self.logger.debug(conn_ctx.error)
350337
self.logger.debug(secure_format_traceback())
351-
338+
finally:
339+
if connection:
340+
connection.close()
352341
conn_ctx.waiter.set()
353342

354343
def connect(self, connector: ConnectorInfo):

nvflare/fuel/f3/drivers/grpc_driver.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,18 @@ def close(self):
6868
if self.context:
6969
try:
7070
self.context.abort(grpc.StatusCode.CANCELLED, "service closed")
71-
except:
71+
except Exception as ex:
7272
# ignore any exception when aborting
73-
pass
73+
self.logger.debug(f"exception aborting GRPC context: {secure_format_exception(ex)}")
7474
self.context = None
75+
self.logger.info("Closed GRPC context")
7576
if self.channel:
76-
self.channel.close()
77+
try:
78+
self.channel.close()
79+
except Exception as ex:
80+
self.logger.debug(f"exception closing GRPC channel: {secure_format_exception(ex)}")
7781
self.channel = None
82+
self.logger.info("Closed GRPC Channel")
7883

7984
def send_frame(self, frame: Union[bytes, bytearray, memoryview]):
8085
try:
@@ -233,39 +238,36 @@ def connect(self, connector: ConnectorInfo):
233238
params = connector.params
234239
address = get_address(params)
235240
conn_props = {DriverParams.PEER_ADDR.value: address}
241+
connection = None
242+
try:
243+
secure = ssl_required(params)
244+
if secure:
245+
self.logger.debug("CLIENT: creating secure channel")
246+
channel = grpc.secure_channel(
247+
address, options=self.options, credentials=get_grpc_client_credentials(params)
248+
)
249+
self.logger.info(f"created secure channel at {address}")
250+
else:
251+
self.logger.info("CLIENT: creating insecure channel")
252+
channel = grpc.insecure_channel(address, options=self.options)
253+
self.logger.info(f"created insecure channel at {address}")
236254

237-
secure = ssl_required(params)
238-
if secure:
239-
self.logger.debug("CLIENT: creating secure channel")
240-
channel = grpc.secure_channel(
241-
address, options=self.options, credentials=get_grpc_client_credentials(params)
242-
)
243-
self.logger.info(f"created secure channel at {address}")
244-
else:
245-
self.logger.info("CLIENT: creating insecure channel")
246-
channel = grpc.insecure_channel(address, options=self.options)
247-
self.logger.info(f"created insecure channel at {address}")
248-
249-
with channel:
250255
stub = StreamerStub(channel)
251256
self.logger.debug("CLIENT: got stub")
252257
oq = QQ()
253258
connection = StreamConnection(oq, connector, conn_props, "CLIENT", channel=channel)
254259
self.add_connection(connection)
255260
self.logger.debug("CLIENT: added connection")
256-
try:
257-
received = stub.Stream(connection.generate_output())
258-
connection.read_loop(received)
259-
260-
except BaseException as ex:
261-
self.logger.info(f"CLIENT: connection done: {type(ex)}")
262-
263-
with connection.lock:
264-
# when we get here the channel is already closed
265-
# set connection.channel to None to prevent closing channel again in connection.close().
266-
connection.channel = None
267-
connection.close()
268-
self.close_connection(connection)
261+
received = stub.Stream(connection.generate_output())
262+
connection.read_loop(received)
263+
except grpc.FutureCancelledError:
264+
self.logger.debug("RPC Cancelled")
265+
except Exception as ex:
266+
self.logger.error(f"connection {connection} error: {type(ex)}: {secure_format_exception(ex)}")
267+
finally:
268+
if connection:
269+
connection.close()
270+
self.close_connection(connection)
269271
self.logger.info(f"CLIENT: finished connection {connection}")
270272

271273
@staticmethod

0 commit comments

Comments
 (0)