intel
diff --git a/‎backend/generate_pptx.py‎
Lines changed: 41 additions & 2475 deletions b/‎backend/generate_pptx.py‎
Lines changed: 41 additions & 2475 deletions
diff --git a/‎backend/main.py‎
Lines changed: 68 additions & 40 deletions b/‎backend/main.py‎
Lines changed: 68 additions & 40 deletions
diff --git a/‎backend/pptx_builder/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/pptx_builder/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/pptx_builder/builder.py‎
Lines changed: 64 additions & 0 deletions b/‎backend/pptx_builder/builder.py‎
Lines changed: 64 additions & 0 deletions
@@ -1,7 +1,6 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-
-from fastapi import FastAPI, HTTPException, File, UploadFile
+from fastapi import BackgroundTasks, FastAPI, HTTPException, File, UploadFile
 from pydantic import BaseModel
 import fitz  # PyMuPDF
 from pathlib import Path
@@ -12,11 +11,15 @@
 from generate_image_embedding import generate_image_embedding
 from fastapi.responses import FileResponse, JSONResponse
 from generate_pptx import create_pptx
+from generate_pptx import create_pptx
 from starlette.background import BackgroundTask
 import tempfile
 import imagehash
 from PIL import Image
 import io
+import uuid
+from typing import Dict
+import json
 
 app = FastAPI()
 
@@ -26,22 +29,10 @@
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 
 
-@app.post("/parse")
-async def parse_pdf(file: UploadFile = File(...)):
-    """
-    Endpoint to parse a PDF file uploaded via multipart/form-data.
-    Extracts images, generates captions and embeddings, and returns the data.
-    """
-    temp_file_path = None
+def process_pdf_to_file(job_id: str, pdf_path: str, filename: str):
     try:
-        # Create temp file with delete=False to avoid Windows file locking issues
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-            temp_file.write(await file.read())
-            temp_file_path = temp_file.name
-
-        print(f"DEBUG : Temporary PDF file created at: {temp_file_path}")
-        # Open the PDF file using PyMuPDF (now works on Windows since file is closed)
-        pdf_file = fitz.open(str(temp_file_path))
+        print(f"Processing job {job_id}")
+        pdf_file = fitz.open(str(pdf_path))
         image_data = []
         image_order = 1
         seen_hashes = set()
@@ -88,33 +79,67 @@ async def parse_pdf(file: UploadFile = File(...)):
 
         # Prepare the response data
         response_data = {
-            "name": file.filename,
+            "name": filename,
             "details": f"Extracted {len(image_data)} images from the PDF.",
             "images": image_data,
             "text": extracted_text,
         }
 
-        return JSONResponse(content=response_data)
+        temp_dir = tempfile.gettempdir()
+        result_path = os.path.join(temp_dir, f"{job_id}.json")
+        with open(result_path, "w") as f:
+            json.dump(response_data, f)
 
     except Exception as e:
-        print(f"Error processing PDF: {e}")
-        raise HTTPException(
-            status_code=500, detail=f"An error occurred while processing the PDF: {e}"
-        )
+        print(f"Error in processing pdf job_id: {job_id}: {e}")
+
     finally:
-        # Clean up temporary file on Windows
-        if temp_file_path and os.path.exists(temp_file_path):
-            try:
-                os.unlink(temp_file_path)
-                print(f"DEBUG: Cleaned up temporary file: {temp_file_path}")
-            except Exception as cleanup_error:
-                print(
-                    f"Warning: Failed to clean up temporary file {temp_file_path}: {cleanup_error}"
-                )
+        try:
+            if os.path.exists(pdf_path):
+                os.remove(pdf_path)
+        except Exception as cleanup_err:
+            print(f"Warning: Failed to remove temporary PDF {pdf_path}: {cleanup_err}")
+
+
+@app.post("/upload")
+async def upload_file(
+    file: UploadFile = File(...), background_tasks: BackgroundTasks = None
+):
+    try:
+        # Generate job ID
+        job_id = str(uuid.uuid4())
+        tmp_dir = tempfile.gettempdir()
+        tmp_path = os.path.join(tmp_dir, f"{job_id}_{file.filename}")
+
+        # Save uploaded file to /tmp
+        with open(tmp_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+
+        # Schedule background PDF processing
+        background_tasks.add_task(process_pdf_to_file, job_id, tmp_path, file.filename)
+
+        return {"jobID": job_id}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error uploading file: {e}")
+
+
+@app.get("/result/{job_id}")
+def get_result(job_id: str):
+    temp_dir = tempfile.gettempdir()
+    result_path = os.path.join(temp_dir, f"{job_id}.json")
+    if not os.path.exists(result_path):
+        return JSONResponse(
+            status_code=202, content={"message": "PDF processing not complete yet."}
+        )
+
+    with open(result_path, "r") as f:
+        result = json.load(f)
+        return result
 
 
 class PPTXRequest(BaseModel):
     content: dict
+    language: str | None = "en"
 
 
 def validate_and_transform_content(content: dict) -> dict:
@@ -130,7 +155,7 @@ def validate_and_transform_content(content: dict) -> dict:
     """
     # Ensure required keys exist with default values if missing
     transformed_content = {
-        "title": content.get("title", "Untitled Presentation"),
+        "title": content.get("title", ""),
         "contentType": content.get("contentType", "lecture"),
         "difficultyLevel": content.get("difficultyLevel", "intermediate"),
         "slides": content.get("slides", []),
@@ -143,13 +168,13 @@ def validate_and_transform_content(content: dict) -> dict:
 
     # Validate slides structure
     for slide in transformed_content["slides"]:
-        slide.setdefault("title", "Untitled Slide")
+        slide.setdefault("title", "")
         slide.setdefault("content", [])
         slide.setdefault("notes", "")
 
     # Validate activities structure
     for activity in transformed_content["activities"]:
-        activity.setdefault("title", "Untitled Activity")
+        activity.setdefault("title", "")
         activity.setdefault("description", "")
         activity.setdefault("type", "Exercise")
         activity.setdefault("duration", "20 minutes")
@@ -168,13 +193,13 @@ def validate_and_transform_content(content: dict) -> dict:
 
     # Validate key terms structure
     for term in transformed_content["keyTerms"]:
-        term.setdefault("term", "Untitled Term")
-        term.setdefault("definition", "No definition provided.")
+        term.setdefault("term", "")
+        term.setdefault("definition", "")
 
     # Validate further readings structure
     for reading in transformed_content["furtherReadings"]:
-        reading.setdefault("title", "Untitled Reading")
-        reading.setdefault("author", "Unknown Author")
+        reading.setdefault("title", "")
+        reading.setdefault("author", "")
         reading.setdefault("readingDescription", "")
 
     return transformed_content
@@ -198,7 +223,10 @@ async def generate_pptx(request: PPTXRequest):
         print(temp_pptx_path)
 
         # Generate the PPTX file
-        create_pptx(transformed_content, temp_pptx_path)
+        lang = (request.language or "en").lower()
+        if lang not in ["en", "id"]:
+            lang = "en"
+        create_pptx(transformed_content, temp_pptx_path, lang)
         print(f"Temporary PPTX file created at: {temp_pptx_path}")
 
         if not os.path.exists(temp_pptx_path):
 
@@ -0,0 +1 @@
+# pptx_builder package initialization
@@ -0,0 +1,64 @@
+import os
+import json
+import tempfile
+from pptx import Presentation
+from pptx.util import Inches
+from .constants import SLIDE_WIDTH, SLIDE_HEIGHT
+from .localization import set_language, t
+from .slide_counter import calculate_total_slides
+from .sections import (
+    create_title_slide,
+    create_agenda_slide,
+    create_learning_outcomes_slide,
+    create_key_terms_slide,
+    create_content_slides,
+    create_activity_slides,
+    create_quiz_slides,
+    create_discussion_slides,
+    create_further_readings_slides,
+    create_facilitation_notes_slide,
+    create_closing_slide,
+)
+
+
+def build_full_presentation(content, language="en"):
+    set_language(language)
+    total_slides = calculate_total_slides(content)
+    prs = Presentation()
+    prs.slide_width = Inches(SLIDE_WIDTH)
+    prs.slide_height = Inches(SLIDE_HEIGHT)
+    create_title_slide(prs, content)
+    create_agenda_slide(prs, content, total_slides)
+    create_learning_outcomes_slide(prs, content, total_slides)
+    create_key_terms_slide(prs, content, total_slides)
+    create_content_slides(prs, content, total_slides)
+    create_activity_slides(prs, content, total_slides)
+    create_quiz_slides(prs, content, total_slides)
+    create_discussion_slides(prs, content, total_slides)
+    create_further_readings_slides(prs, content, total_slides)
+    facilitation_slide = create_facilitation_notes_slide(prs, content, total_slides)
+    if facilitation_slide:
+        total_slides += 1  # update for closing slide numbering if needed
+    create_closing_slide(prs, content, total_slides, total_slides)
+    return prs
+
+
+def create_pptx(content: dict, output_path: str, language: str = "en"):
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    normalized_output_path = os.path.abspath(output_path)
+    allowed_output = os.path.abspath(os.path.join(base_dir, "..", "output"))
+    parent = os.path.dirname(normalized_output_path)
+    is_temp = parent.startswith(os.path.abspath(tempfile.gettempdir()))
+    is_out = normalized_output_path.startswith(allowed_output)
+    if not (is_temp or is_out):
+        raise ValueError(
+            "Security violation: Output path must be in allowed directories"
+        )
+    prs = build_full_presentation(content, language)
+    prs.save(normalized_output_path)
+
+
+def cli_build(content_path, output_path, language="en"):
+    with open(content_path, "r") as f:
+        content = json.load(f)
+    create_pptx(content, output_path, language)