diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 000000000..195504389
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,79 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+# This workflow will build a Java project with Gradle and cache/restore any dependencies to improve the workflow execution time
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-gradle
+
+name: Java CI with Gradle
+
+on:
+  push:
+    branches: [ "master" ]
+    paths-ignore:
+      - 'src/python/**'
+      - '.github/workflows/lint.yml'
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+#    strategy:
+#      matrix:
+#        os: [windows-latest,ubunto-latest,macos-latest] 
+    permissions:
+      contents: read
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up JDK 1.8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 1.8
+    - name: Grant execute permission for gradlew
+      run: chmod +x gradlew
+
+    - name: Build with Gradle
+      run: ./gradlew build
+ 
+
+  docker:
+    needs: build
+    runs-on: ubuntu-latest
+
+    steps:
+      
+      - name: Checkout
+        uses: actions/checkout@v4
+        
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          push: true
+          tags: anoshrz/java_project:latest 
+
+      - name: Log out from Docker Hub
+        run: docker logout
+    # NOTE: The Gradle Wrapper is the default and recommended way to run Gradle (https://docs.gradle.org/current/userguide/gradle_wrapper.html).
+    # If your project does not have the Gradle Wrapper configured, you can use the following configuration to run Gradle with a specified version.
+    #
+    # - name: Setup Gradle
+    #   uses: gradle/actions/setup-gradle@417ae3ccd767c252f5661f1ace9f835f9654f2b5 # v3.1.0
+    #   with:
+    #     gradle-version: '8.5'
+    #
+    # - name: Build with Gradle 8.5
+    #   run: gradle build
+
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index a429a2a98..000000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# This workflow will build a Java project with Gradle
-# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle
-
-name: Java CI with Gradle
-
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
-
-jobs:
-  build-java:
-
-    runs-on: ubuntu-latest
-    
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Set up JDK 1.8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 1.8
-
-    - name: Grant execute permission for gradlew
-      run: chmod +x gradlew
-
-    - name: Build with Gradle
-      run: ./gradlew build
-
-    - name: Build and Push Docker Image
-      uses: mr-smithers-excellent/docker-build-push@v4
-      with:
-        image: nanajanashia/demo-app
-        registry: docker.io
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_PASSWORD }}
-         
diff --git a/.github/workflows/greetings b/.github/workflows/greetings
new file mode 100644
index 000000000..3f892a703
--- /dev/null
+++ b/.github/workflows/greetings
@@ -0,0 +1,13 @@
+name: Greetings
+
+on: [pull_request, issues]
+
+jobs:
+  greeting:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/first-interaction@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        issue-message: 'Welcome on your first issue'
+        pr-message: 'Message that will be displayed on users first pr'
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000..048a66bb5
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,12 @@
+name: Lint and Format Code
+on:
+  push:
+    paths:
+      - 'src/python/**'
+      - '.github/workflows/lint.yml'
+jobs:
+  lint-and-format:
+    uses: anosh-ar/FAPS_Github_Action/.github/workflows/lint_reusable.yml@master
+    secrets:
+      SLACK_WEBHOOK_URL: ${{secrets.SLACK_WEBHOOK_URL}}
+    
diff --git a/.github/workflows/lint_reusable.yml b/.github/workflows/lint_reusable.yml
new file mode 100644
index 000000000..47b4e1bb2
--- /dev/null
+++ b/.github/workflows/lint_reusable.yml
@@ -0,0 +1,41 @@
+name: Reusable Linting to Slack
+on:
+  workflow_call:
+    secrets:
+      SLACK_WEBHOOK_URL:
+        required: true
+
+jobs:
+  lint-and-format:
+    runs-on: arc-runner-set
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black pylint
+    - name: Format code with Black
+      run: black .
+    - name: Run pylint
+      id: lint
+      continue-on-error: true
+      run: pylint ./**/**.py > pylint_report.txt
+    
+    - name: Send code rating to slack
+      run: |
+        RATING=$(grep -oP "Your code has been rated at \K[0-9\.]+/[0-9\.]+" pylint_report.txt)
+        curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Your code rating of your recent push is: ${RATING}\"}" ${{ secrets.SLACK_WEBHOOK_URL }}
+    
+    - name: Check for critical pylint errors
+      id: pylint_check
+      run: |
+        if grep -E "E[0-9]+" pylint_report.txt; then
+          echo "Critical pylint errors found in the code. Please check pylint_report.txt for details."
+          ERROR_MSG=$(grep -E "E[0-9]+" pylint_report.txt)
+          curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Critical linting errors found in the code. Please check the logs for details:\n\`\`\`$ERROR_MSG\`\`\`\"}" ${{ secrets.SLACK_WEBHOOK_URL }}
+        fi
diff --git a/Dockerfile b/Dockerfile
index d2b1dc574..a382a2311 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@ FROM openjdk:8-jre-alpine
 
 EXPOSE 8080
 
-COPY ./build/libs/my-app-1.0-SNAPSHOT.jar /usr/app/
+#COPY ./build/libs/my-app-1.0-SNAPSHOT.jar /usr/app/
 WORKDIR /usr/app
 
 ENTRYPOINT ["java", "-jar", "my-app-1.0-SNAPSHOT.jar"]
diff --git a/src/python/preprocessing.py b/src/python/preprocessing.py
new file mode 100644
index 000000000..bd16bb21b
--- /dev/null
+++ b/src/python/preprocessing.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+
+def merge_data():
+
+    # Paths to the CSV files
+    csv_file_1 = "sources/qa.csv"  # Answer
+    csv_file_2 = "sources/cncf_stackoverflow_qas.csv"  # answer
+
+    # Read the CSV files
+    df1 = pd.read_csv(csv_file_1)
+    df2 = pd.read_csv(csv_file_2)
+
+    # Select and rename the columns of interest from the first file
+    df1_selected = df1[["Question", "Answer", "Project"]]
+
+    # Select and rename the columns of interest from the second file
+
+    df2_selected = df2[["question", "answer", "tag"]].rename(
+        columns={"question": "Question", "answer": "Answer", "tag": "Project"}
+    )
+
+    # Concatenate the selected and renamed columns
+    merged_df = pd.concat([df1_selected, df2_selected])
+
+    # Save the merged DataFrame to a new CSV file
+    merged_df.to_csv("merged_qas.csv", index=False)
+
+    print("Columns merged and saved successfully!")
+
+
+if __name__ == "__main__":
+    merge_data()
+    print(test_wrong_value)
diff --git a/src/python/stackoverflow_extractor.py b/src/python/stackoverflow_extractor.py
new file mode 100644
index 000000000..66f48543e
--- /dev/null
+++ b/src/python/stackoverflow_extractor.py
@@ -0,0 +1,314 @@
+import yaml
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+import time
+import json
+import os
+import sys
+from datetime import datetime, timedelta
+
+API_KEY = ''  # Replace with your actual API key of stackexchange
+REQUEST_DELAY = 0  # Number of seconds to wait between requests
+PROGRESS_FILE = 'sources/stackoverflow_progress.json'
+CSV_FILE = 'sources/cncf_stackoverflow_qas.csv'
+PROCESSED_IDS_FILE = 'sources/processed_question_ids.json'
+TAGS_FILE = 'sources/tags.json'
+TAGS_UPDATE_INTERVAL = 7  # Number of days between tag updates
+DAILY_REQUEST_LIMIT = 9000
+
+
+def fetch_with_backoff(api_url, params):
+    """Fetch data from the API with exponential backoff for rate limiting.
+
+    Args:
+        api_url (str): The API endpoint URL.
+        params (dict): Dictionary of query parameters for the API request.
+
+    Returns:
+        dict: The JSON response data from the API if successful.
+        None: If the API request fails.
+    """
+    while True:
+        # print(f"Fetching data with params: {params}")
+        response = requests.get(api_url, params=params)
+        if response.status_code == 200:
+            return response.json()
+        elif response.status_code == 429:
+            print("Rate limit exceeded. Waiting for retry...")
+            retry_after = int(response.headers.get('retry-after', REQUEST_DELAY))
+            sys.exit()
+        else:
+            print(f"Failed to fetch data: {response.status_code} - {response.text}")
+            sys.exit()
+            return None
+
+def qa_extractor(request_count, tag, start_page, page_size=100,):
+    """Fetch questions from StackOverflow for a given tag.
+
+    Args:
+        request_count (int): Current count of API requests made.
+        tag (str): The tag to search for on StackOverflow.
+        start_page (int): The starting page number for the API request.
+        page_size (int, optional): Number of results per page. Defaults to 100.
+        
+
+    Returns:
+        int: Updated request count after fetching questions.
+    """
+
+    api_url = "https://api.stackexchange.com/2.3/search/advanced"
+    questions = []
+    
+    # Load processed question IDs
+    processed_question_ids = load_processed_question_ids()
+    
+    while True:
+        if request_count >= DAILY_REQUEST_LIMIT:
+            break
+        
+        params = {
+            'page': start_page,
+            'pagesize': page_size,
+            'order': 'desc',
+            'sort': 'activity',
+            'answers': 1,
+            'tagged': tag,
+            'site': 'stackoverflow',
+            'filter': 'withbody',  # Ensuring the 'body' field is included
+            'key': API_KEY
+        }
+        
+        response_data = fetch_with_backoff(api_url, params)
+        request_count += 1
+        if not response_data or not response_data['items']:
+            save_progress(tag, "null")
+            break
+        QA_list = []
+        if response_data:
+            questions.extend(response_data['items'])
+            
+            for question in response_data['items']:
+                question_id = question['question_id']
+                if question_id in processed_question_ids:
+                    continue
+                if question['answer_count'] > 0:
+                    question_text = remove_html_tags(question['body'])
+                    request_count += 1
+                    
+                    answers = fetch_answers(question_id)
+                    # formatted_answers = []
+                    
+                    for count, answer in enumerate(answers, start=1):
+                        if count > 3:
+                            break
+                        if answer['score'] < 0:
+                            continue
+                        answer_text = remove_html_tags(answer['body'])
+                        # formatted_answers.append(f"{count}. {answer_text}")
+                    
+                        QA_list.append({
+                            "question": question_text,
+                            # "answer": "\n".join(formatted_answers),
+                            "answer": answer_text,
+                            "tag": tag,
+                        })
+                    
+                    # Add question ID to the set of processed IDs
+                    processed_question_ids.add(question_id)
+            
+            has_more = response_data.get('has_more', False)
+            if not has_more:
+                save_progress(tag, "finished")
+                break
+            
+            print(f"Fetched {len(response_data['items'])} questions from page {start_page} for tag '{tag}'. Total so far: {len(questions)}")
+            save_to_csv(QA_list, CSV_FILE)
+            save_processed_question_ids(processed_question_ids)
+            start_page += 1
+            save_progress(tag, start_page)
+            time.sleep(REQUEST_DELAY)  # Add delay between requests to avoid rate limiting
+        else:
+            break
+        if request_count >= DAILY_REQUEST_LIMIT:
+            # print(f"Request count is: {request_count}")
+            break
+    
+    print(f"Request count for question is: {request_count}")
+    return request_count
+
+def fetch_answers(question_id):
+    """Fetch answers for a specific question from StackOverflow.
+
+    Args:
+        question_id (int): The ID of the question to fetch answers for.
+
+    Returns:
+        list: List of answer items if successful, otherwise an empty list.
+    """
+    api_url = f"https://api.stackexchange.com/2.3/questions/{question_id}/answers"
+    params = {
+        'order': 'desc',
+        'sort': 'votes',
+        'site': 'stackoverflow',
+        'filter': 'withbody',  # Ensuring the 'body' field is included
+        'key': API_KEY
+    }
+    
+    response_data = fetch_with_backoff(api_url, params)
+    return response_data['items'] if response_data else []
+
+def remove_html_tags(text):
+    """Remove HTML tags from a given text.
+
+    Args:
+        text (str): The HTML text to be processed.
+
+    Returns:
+        str: The text with HTML tags removed.
+    """
+    soup = BeautifulSoup(text, "html.parser")
+    return soup.get_text()
+
+def extract_all_projects(tags, request_count):
+    """Extract QA pairs for multiple tags.
+
+    Args:
+        tags (list): List of tags to process.
+        request_count (int): Initial count of API requests made.
+    """
+    progress = load_progress()
+    all_tags_done = True  # Flag to check if all tags are done
+    for tag in tags:
+        if progress.get(tag) == "null" or progress.get(tag) == "finished": 
+            continue
+        else: 
+            all_tags_done = False  # Found a tag that needs processing
+            start_page = progress.get(tag, 1)
+        
+        request_count = qa_extractor(request_count, tag, start_page=start_page)
+        if request_count >= DAILY_REQUEST_LIMIT:
+            break
+    if all_tags_done:
+        print("We have reached all question-answer data from StackOverflow.")
+
+def save_to_csv(data, filename):
+    """Save extracted data to a CSV file.
+
+    Args:
+        data (list): List of dictionaries containing QA data.
+        filename (str): The filename for the CSV file.
+    """
+    if os.path.exists(filename) and os.path.getsize(filename) > 0:
+        try:
+            df = pd.read_csv(filename)
+            df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)
+        except pd.errors.EmptyDataError:
+            df = pd.DataFrame(data)
+    else:
+        df = pd.DataFrame(data)
+    df.to_csv(filename, index=False)
+    # print(f"Data saved to {filename}")
+
+def load_progress():
+    """Load progress data from file.
+
+    Returns:
+        dict: Dictionary containing progress data.
+    """
+    try:
+        with open(PROGRESS_FILE, 'r') as f:
+            data = json.load(f)
+            return data
+    except FileNotFoundError:
+        print(f"File {PROGRESS_FILE} not found.")
+        return {}
+    except json.JSONDecodeError:
+        print(f"Error decoding JSON data in {PROGRESS_FILE}.")
+        return {}
+
+def save_progress(tag, page):
+    """Save progress data to file.
+
+    Args:
+        tag (str): The tag being processed.
+        page (str or int): The current page number or status.
+    """
+    progress = load_progress()
+    progress[tag] = page
+    with open(PROGRESS_FILE, 'w') as f:
+        json.dump(progress, f)
+
+def load_processed_question_ids():
+    """Load processed question IDs from file.
+
+    Returns:
+        set: Set of processed question IDs.
+    """
+    try:
+        if os.path.getsize(PROCESSED_IDS_FILE) == 0:
+            return set()
+        with open(PROCESSED_IDS_FILE, 'r') as f:
+            return set(json.load(f))
+    except FileNotFoundError:
+        return set()
+    except json.JSONDecodeError:
+        return set()
+
+def save_processed_question_ids(processed_ids):
+    """Save processed question IDs to file.
+
+    Args:
+        processed_ids (set): Set of processed question IDs.
+    """
+    with open(PROCESSED_IDS_FILE, 'w') as f:
+        json.dump(list(processed_ids), f)
+
+def load_tags():
+    """Load tags from the JSON file if it's not older than the update interval, otherwise from the YAML file.
+
+    Returns:
+        list: List of tags.
+    """
+    if os.path.exists(TAGS_FILE):
+        with open(TAGS_FILE, 'r') as f:
+            tags_data = json.load(f)
+            last_update = datetime.strptime(tags_data['last_update'], "%Y-%m-%d")
+            if datetime.now() - last_update < timedelta(days=TAGS_UPDATE_INTERVAL):
+                return tags_data['tags']
+    
+    # If the JSON file doesn't exist or is older than the update interval, load from YAML
+    with open("sourcesl/andscape_augmented.yml", 'r') as f:
+        data = yaml.safe_load(f)
+    
+    tags = []
+    # Initialize a dictionary to save tags corresponding to each file
+    tags_dict = {'Project_name': ""}
+    # Process the loaded data
+    for category in data['landscape']:
+        category_list = ["App Definition and Development", "Orchestration & Management", "Runtime", \
+                        "Provisioning", "Observability and Analysis", "Test_Provisioning"]
+        if category['name'] not in category_list:
+            continue
+        tags_dict['Category'] = category['name']
+        for subcategory in category.get('subcategories', []):
+            for item in subcategory.get('items', []):
+                project_name = item['name'].split('(')[0].strip()
+                tags_dict['Project_name'] = project_name
+                tags.append(tags_dict['Project_name']) 
+    
+    # Save the tags to the JSON file with the current date
+    tags_data = {
+        'tags': tags,
+        'last_update': datetime.now().strftime("%Y-%m-%d")
+    }
+    with open(TAGS_FILE, 'w') as f:
+        json.dump(tags_data, f)
+    
+    return tags
+
+if __name__ == "__main__":
+    tags = load_tags()
+    request_count = 0
+    # Extract and save QA pairs incrementally
+    extract_all_projects(tags, request_count)