Merge pull request #30 from Gurummang/develop

West-wise · web-flow · commit db7208f0260a · 2024-10-19T02:50:31.000+09:00
Develop
diff --git a/app/rabbitmq_consumer.py b/app/rabbitmq_consumer.py
@@ -51,33 +51,34 @@ async def connect_to_rabbitmq() -> Optional[Connection]:
 
 
 async def on_message(message: IncomingMessage, yara_rules):
-    async with message.process():
+    try:
+        body = message.body
+        logging.info(f"Received message: {body}")
+
+        if not body:
+            logging.error("Received empty message")
+            await message.nack(requeue=False)  # 재처리 없이 nack
+            return
+
+        try:
+            message_str = body.decode("utf-8")
+            logging.info(f"Decoded message: {message_str}")
+        except UnicodeDecodeError:
+            logging.error(f"Failed to decode message: {body}")
+            await message.nack(requeue=False)  # 재처리 없이 nack
+            return
+
         try:
-            body = message.body
-            logging.info(f"Received message: {body}")
-
-            if not body:
-                logging.error("Received empty message")
-                return
-
-            try:
-                message_str = body.decode("utf-8")
-                logging.info(f"Decoded message: {message_str}")
-            except UnicodeDecodeError:
-                logging.error(f"Failed to decode message: {body}")
-                await message.nack(requeue=False)  # 잘못된 메시지 재처리 안 함
-                return
-
-            try:
-                file_id = int(message_str)
-                logging.info(f"Processing file with ID: {file_id}")
-                await scan_file(file_id, yara_rules)
-            except ValueError:
-                logging.error(f"Invalid file ID format: {message_str}")
-                await message.nack(requeue=False)  # 잘못된 파일 ID 재처리 안 함
-        except Exception as e:
-            logging.exception(f"Error processing message: {e}")
-            await message.nack(requeue=True)  # 예외 발생 시 메시지 재처리 가능
+            file_id = int(message_str)
+            logging.info(f"Processing file with ID: {file_id}")
+            await scan_file(file_id, yara_rules)
+            await message.ack()  # 성공적으로 처리되면 ack
+        except ValueError:
+            logging.error(f"Invalid file ID format: {message_str}")
+            await message.nack(requeue=False)  # 잘못된 파일 ID는 재처리 안 함
+    except Exception as e:
+        logging.exception(f"Error processing message: {e}")
+        await message.nack(requeue=False)  # 예외 발생 시 메시지를 재처리 가능
 
 
 async def start_consuming(queue_name: str, yara_rules, routing_key: str):
@@ -96,11 +97,20 @@ async def start_consuming(queue_name: str, yara_rules, routing_key: str):
         await queue.bind(exchange, routing_key)
 
         logging.info(f"Waiting for messages in {queue_name}. To exit press CTRL+C")
-        await queue.consume(lambda message: on_message(message, yara_rules))
 
+        async def shutdown():
+            logging.info("Shutting down consumer.")
+            await connection.close()
+
+        # 메시지를 처리하는 부분에 shutdown 처리 로직을 추가
+        loop = asyncio.get_event_loop()
         try:
+            await queue.consume(lambda message: on_message(message, yara_rules))
             await asyncio.Future()  # 무한 대기
+        except asyncio.CancelledError:
+            await shutdown()
         finally:
-            await connection.close()
+            await shutdown()  # 종료 시 RabbitMQ 연결 정리
     except Exception as e:
         logging.exception(f"Error in start_consuming: {e}")
+
diff --git a/app/rabbitmq_sender.py b/app/rabbitmq_sender.py
@@ -1,8 +1,9 @@
 import logging
 import ssl
-import time
 import struct
 import pika
+import aio_pika
+import asyncio
 
 from app import (
     ALERT_EXCHANGE_NAME,
@@ -15,6 +16,7 @@
     RABBITMQ_USER,
     RETRY_INTERVAL,
 )
+MAX_RETRIES = 10
 
 # SSL 설정
 ssl_options = None
@@ -23,48 +25,60 @@
     ssl_options = pika.SSLOptions(context=ssl_context)
 
 
-def connect_to_rabbitmq():
-    while True:
+async def connect_to_rabbitmq():
+    retry_count = 0
+    while retry_count < MAX_RETRIES:
         try:
-            credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASSWORD)
-            parameters = pika.ConnectionParameters(
+            ssl_context = None
+            if RABBITMQ_SSL_ENABLED:
+                ssl_context = ssl.create_default_context()
+                ssl_context.check_hostname = False
+                ssl_context.verify_mode = ssl.CERT_NONE
+
+            connection = await aio_pika.connect_robust(
                 host=RABBITMQ_HOST,
                 port=int(RABBITMQ_PORT),
-                credentials=credentials,
-                ssl_options=ssl_options,
-                connection_attempts=3,
-                retry_delay=5,
-                socket_timeout=10.0,  # 타임아웃 설정 (초)
+                login=RABBITMQ_USER,
+                password=RABBITMQ_PASSWORD,
+                ssl=ssl_context,
+                loop=asyncio.get_event_loop()  # asyncio 이벤트 루프 사용
             )
-            connection = pika.BlockingConnection(parameters)
             return connection
-        except pika.exceptions.AMQPConnectionError as e:
+        except aio_pika.AMQPConnectionError as e:
+            retry_count += 1
             logging.error(
-                f"Connection failed, retrying in {RETRY_INTERVAL} seconds... Error: {e}"
+                f"Connection failed, retrying ({retry_count}/{MAX_RETRIES}) in {RETRY_INTERVAL} seconds... Error: {e}"
             )
-            time.sleep(RETRY_INTERVAL)
+            await asyncio.sleep(RETRY_INTERVAL)
+
+    logging.error(f"Failed to connect to RabbitMQ after {MAX_RETRIES} attempts.")
+    return None
 
 
-def send_message(message: int):
-    connection = connect_to_rabbitmq()
-    channel = connection.channel()
+async def send_message(message: int):
+    connection = await connect_to_rabbitmq()
+    if not connection:
+        logging.error("Failed to establish connection to RabbitMQ.")
+        return
 
-    # Exchange 선언
-    channel.exchange_declare(
-        exchange=ALERT_EXCHANGE_NAME, exchange_type=EXCHANGE_TYPE, durable=True
-    )
+    async with connection:
+        channel = await connection.channel()
+
+        # Exchange 선언
+        exchange = await channel.declare_exchange(
+            ALERT_EXCHANGE_NAME, aio_pika.ExchangeType(EXCHANGE_TYPE), durable=True
+        )
 
-    # int 메시지를 바이트로 변환
-    message_bytes = struct.pack('!Q', message)  # '!Q'는 unsigned long long 형식입니다.
+        # int 메시지를 바이트로 변환
+        message_bytes = struct.pack('!Q', message)  # '!Q'는 unsigned long long 형식입니다.
 
-    channel.basic_publish(
-        exchange=ALERT_EXCHANGE_NAME,
-        routing_key=ALERT_ROUTING_KEY,  # 적절한 라우팅 키로 변경
-        body=message_bytes,
-        properties=pika.BasicProperties(
-            delivery_mode=2  # 메시지 영속화
+        await exchange.publish(
+            aio_pika.Message(
+                body=message_bytes,
+                delivery_mode=aio_pika.DeliveryMode.PERSISTENT
+            ),
+            routing_key=ALERT_ROUTING_KEY
         )
-    )
 
-    print(f"Sent message: {message}")
-    connection.close()
+        logging.info(f"Sent message: {message}")
+    # connection.close()
diff --git a/app/utils.py b/app/utils.py
@@ -1,6 +1,7 @@
 import asyncio
 import logging
 import os
+import functools
 from collections import defaultdict
 from datetime import datetime
 import pytz
@@ -86,13 +87,18 @@ async def stream_file_from_s3(s3_key):
 
     try:
         loop = asyncio.get_event_loop()
-        response = await loop.run_in_executor(None, s3_client.get_object, {"Bucket": bucket_name, "Key": key})
+        # functools.partial로 키워드 인자를 전달할 수 있도록 함
+        response = await loop.run_in_executor(
+            None, functools.partial(s3_client.get_object, Bucket=bucket_name, Key=key)
+        )
         return response["Body"]
     except Exception as e:
         logging.error(f"Failed to stream file from S3: {e}")
         raise
 
 
+
+
 async def save_scan_result(uploadId: int, stored_file_id, detect, detail):
     try:
         conn = await aiomysql.connect(
@@ -101,7 +107,8 @@ async def save_scan_result(uploadId: int, stored_file_id, detect, detail):
         async with conn.cursor() as cursor:
             try:
                 await cursor.execute(
-                    "INSERT INTO scan_table (file_id, detect, step2_detail) VALUES (%s, %s, %s)",
+                    "INSERT INTO scan_table (file_id, detect, step2_detail) VALUES (%s, %s, %s) "
+                    "ON DUPLICATE KEY UPDATE detect=VALUES(detect), step2_detail=VALUES(step2_detail)",
                     (stored_file_id, detect, detail),
                 )
                 await conn.commit()
@@ -125,7 +132,7 @@ async def save_scan_result(uploadId: int, stored_file_id, detect, detail):
         raise
 
 
-async def select_keyword(matches):
+def select_keyword(matches):
     keyword_count = defaultdict(int)
 
     for match in matches:
@@ -134,12 +141,14 @@ async def select_keyword(matches):
             keyword_count[atk_type] += 1
 
     if keyword_count:
-        most_common_keyword = max(keyword_count, key=keyword_count.get)
-        logging.info(f"Most common atk_type: {most_common_keyword}")
-        return most_common_keyword
+        # 가장 많이 매칭된 atk_type 값을 추출
+        keywords = str(keyword_count.keys())
+        logging.info(f"Most common atk_type: {keywords}")
+        return keywords
     else:
         logging.info("No atk_type found in matches")
-        return None
+        return "unmatched"  # None 대신 기본값 반환
+
 
 
 async def yara_test_match(file_path, yara_rules):
@@ -161,21 +170,31 @@ async def yara_test_match(file_path, yara_rules):
 
 async def scan_file(upload_id: int, yara_rules):
     try:
+        # 파일 업로드 정보 가져오기
         file_record = await get_file_upload(upload_id)
         salted_hash = file_record["salted_hash"]
 
+        # 저장된 파일 정보 가져오기
         stored_file_record = await get_stored_file(salted_hash)
         stored_file_id = stored_file_record["id"]
         s3_key = stored_file_record["save_path"]
 
         file_stream = await stream_file_from_s3(s3_key)
-        file_data = await file_stream.read()
 
+        # S3에서 반환된 file_stream은 이미 bytes 객체입니다.
+        file_data = file_stream.read()  # 여기에서 read()는 필요 없음, file_stream 자체가 파일 데이터임
+
+        # YARA 룰 매칭
         matches = yara_rules.match(data=file_data)
+
         detect = 1 if matches else 0
 
-        most_common_keyword = await select_keyword(matches)
-        detail = "\n".join([str(match) for match in matches]) if matches else "unmatched"
+        most_common_keyword = select_keyword(matches)
+        if most_common_keyword is None:
+            most_common_keyword = "unmatched"
+        detail = (
+            "\n".join([str(match) for match in matches]) if matches else "unmatched"
+        )
 
         logging.info(f"result: {matches}")
         logging.info(f"detect: {detect}")
@@ -188,6 +207,7 @@ async def scan_file(upload_id: int, yara_rules):
         raise HTTPException(status_code=500, detail="Error scanning file")
 
 
+
 async def get_stored_file(hash: str):
     try:
         conn = await aiomysql.connect(