Skip to content

Commit 46cb05f

Browse files
committed
fix: dependabot vulnerability for jsondiffpatch by upgrading to AI SDK v5 (#360)
1 parent 703a49c commit 46cb05f

File tree

136 files changed

+13706
-9444
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+13706
-9444
lines changed

backend/generate_pptx.py

Lines changed: 41 additions & 2475 deletions
Large diffs are not rendered by default.

backend/main.py

Lines changed: 68 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Copyright (C) 2025 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
3-
4-
from fastapi import FastAPI, HTTPException, File, UploadFile
3+
from fastapi import BackgroundTasks, FastAPI, HTTPException, File, UploadFile
54
from pydantic import BaseModel
65
import fitz # PyMuPDF
76
from pathlib import Path
@@ -12,11 +11,15 @@
1211
from generate_image_embedding import generate_image_embedding
1312
from fastapi.responses import FileResponse, JSONResponse
1413
from generate_pptx import create_pptx
14+
from generate_pptx import create_pptx
1515
from starlette.background import BackgroundTask
1616
import tempfile
1717
import imagehash
1818
from PIL import Image
1919
import io
20+
import uuid
21+
from typing import Dict
22+
import json
2023

2124
app = FastAPI()
2225

@@ -26,22 +29,10 @@
2629
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
2730

2831

29-
@app.post("/parse")
30-
async def parse_pdf(file: UploadFile = File(...)):
31-
"""
32-
Endpoint to parse a PDF file uploaded via multipart/form-data.
33-
Extracts images, generates captions and embeddings, and returns the data.
34-
"""
35-
temp_file_path = None
32+
def process_pdf_to_file(job_id: str, pdf_path: str, filename: str):
3633
try:
37-
# Create temp file with delete=False to avoid Windows file locking issues
38-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
39-
temp_file.write(await file.read())
40-
temp_file_path = temp_file.name
41-
42-
print(f"DEBUG : Temporary PDF file created at: {temp_file_path}")
43-
# Open the PDF file using PyMuPDF (now works on Windows since file is closed)
44-
pdf_file = fitz.open(str(temp_file_path))
34+
print(f"Processing job {job_id}")
35+
pdf_file = fitz.open(str(pdf_path))
4536
image_data = []
4637
image_order = 1
4738
seen_hashes = set()
@@ -88,33 +79,67 @@ async def parse_pdf(file: UploadFile = File(...)):
8879

8980
# Prepare the response data
9081
response_data = {
91-
"name": file.filename,
82+
"name": filename,
9283
"details": f"Extracted {len(image_data)} images from the PDF.",
9384
"images": image_data,
9485
"text": extracted_text,
9586
}
9687

97-
return JSONResponse(content=response_data)
88+
temp_dir = tempfile.gettempdir()
89+
result_path = os.path.join(temp_dir, f"{job_id}.json")
90+
with open(result_path, "w") as f:
91+
json.dump(response_data, f)
9892

9993
except Exception as e:
100-
print(f"Error processing PDF: {e}")
101-
raise HTTPException(
102-
status_code=500, detail=f"An error occurred while processing the PDF: {e}"
103-
)
94+
print(f"Error in processing pdf job_id: {job_id}: {e}")
95+
10496
finally:
105-
# Clean up temporary file on Windows
106-
if temp_file_path and os.path.exists(temp_file_path):
107-
try:
108-
os.unlink(temp_file_path)
109-
print(f"DEBUG: Cleaned up temporary file: {temp_file_path}")
110-
except Exception as cleanup_error:
111-
print(
112-
f"Warning: Failed to clean up temporary file {temp_file_path}: {cleanup_error}"
113-
)
97+
try:
98+
if os.path.exists(pdf_path):
99+
os.remove(pdf_path)
100+
except Exception as cleanup_err:
101+
print(f"Warning: Failed to remove temporary PDF {pdf_path}: {cleanup_err}")
102+
103+
104+
@app.post("/upload")
105+
async def upload_file(
106+
file: UploadFile = File(...), background_tasks: BackgroundTasks = None
107+
):
108+
try:
109+
# Generate job ID
110+
job_id = str(uuid.uuid4())
111+
tmp_dir = tempfile.gettempdir()
112+
tmp_path = os.path.join(tmp_dir, f"{job_id}_{file.filename}")
113+
114+
# Save uploaded file to /tmp
115+
with open(tmp_path, "wb") as buffer:
116+
shutil.copyfileobj(file.file, buffer)
117+
118+
# Schedule background PDF processing
119+
background_tasks.add_task(process_pdf_to_file, job_id, tmp_path, file.filename)
120+
121+
return {"jobID": job_id}
122+
except Exception as e:
123+
raise HTTPException(status_code=500, detail=f"Error uploading file: {e}")
124+
125+
126+
@app.get("/result/{job_id}")
127+
def get_result(job_id: str):
128+
temp_dir = tempfile.gettempdir()
129+
result_path = os.path.join(temp_dir, f"{job_id}.json")
130+
if not os.path.exists(result_path):
131+
return JSONResponse(
132+
status_code=202, content={"message": "PDF processing not complete yet."}
133+
)
134+
135+
with open(result_path, "r") as f:
136+
result = json.load(f)
137+
return result
114138

115139

116140
class PPTXRequest(BaseModel):
117141
content: dict
142+
language: str | None = "en"
118143

119144

120145
def validate_and_transform_content(content: dict) -> dict:
@@ -130,7 +155,7 @@ def validate_and_transform_content(content: dict) -> dict:
130155
"""
131156
# Ensure required keys exist with default values if missing
132157
transformed_content = {
133-
"title": content.get("title", "Untitled Presentation"),
158+
"title": content.get("title", ""),
134159
"contentType": content.get("contentType", "lecture"),
135160
"difficultyLevel": content.get("difficultyLevel", "intermediate"),
136161
"slides": content.get("slides", []),
@@ -143,13 +168,13 @@ def validate_and_transform_content(content: dict) -> dict:
143168

144169
# Validate slides structure
145170
for slide in transformed_content["slides"]:
146-
slide.setdefault("title", "Untitled Slide")
171+
slide.setdefault("title", "")
147172
slide.setdefault("content", [])
148173
slide.setdefault("notes", "")
149174

150175
# Validate activities structure
151176
for activity in transformed_content["activities"]:
152-
activity.setdefault("title", "Untitled Activity")
177+
activity.setdefault("title", "")
153178
activity.setdefault("description", "")
154179
activity.setdefault("type", "Exercise")
155180
activity.setdefault("duration", "20 minutes")
@@ -168,13 +193,13 @@ def validate_and_transform_content(content: dict) -> dict:
168193

169194
# Validate key terms structure
170195
for term in transformed_content["keyTerms"]:
171-
term.setdefault("term", "Untitled Term")
172-
term.setdefault("definition", "No definition provided.")
196+
term.setdefault("term", "")
197+
term.setdefault("definition", "")
173198

174199
# Validate further readings structure
175200
for reading in transformed_content["furtherReadings"]:
176-
reading.setdefault("title", "Untitled Reading")
177-
reading.setdefault("author", "Unknown Author")
201+
reading.setdefault("title", "")
202+
reading.setdefault("author", "")
178203
reading.setdefault("readingDescription", "")
179204

180205
return transformed_content
@@ -198,7 +223,10 @@ async def generate_pptx(request: PPTXRequest):
198223
print(temp_pptx_path)
199224

200225
# Generate the PPTX file
201-
create_pptx(transformed_content, temp_pptx_path)
226+
lang = (request.language or "en").lower()
227+
if lang not in ["en", "id"]:
228+
lang = "en"
229+
create_pptx(transformed_content, temp_pptx_path, lang)
202230
print(f"Temporary PPTX file created at: {temp_pptx_path}")
203231

204232
if not os.path.exists(temp_pptx_path):

backend/pptx_builder/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# pptx_builder package initialization

backend/pptx_builder/builder.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import os
2+
import json
3+
import tempfile
4+
from pptx import Presentation
5+
from pptx.util import Inches
6+
from .constants import SLIDE_WIDTH, SLIDE_HEIGHT
7+
from .localization import set_language, t
8+
from .slide_counter import calculate_total_slides
9+
from .sections import (
10+
create_title_slide,
11+
create_agenda_slide,
12+
create_learning_outcomes_slide,
13+
create_key_terms_slide,
14+
create_content_slides,
15+
create_activity_slides,
16+
create_quiz_slides,
17+
create_discussion_slides,
18+
create_further_readings_slides,
19+
create_facilitation_notes_slide,
20+
create_closing_slide,
21+
)
22+
23+
24+
def build_full_presentation(content, language="en"):
25+
set_language(language)
26+
total_slides = calculate_total_slides(content)
27+
prs = Presentation()
28+
prs.slide_width = Inches(SLIDE_WIDTH)
29+
prs.slide_height = Inches(SLIDE_HEIGHT)
30+
create_title_slide(prs, content)
31+
create_agenda_slide(prs, content, total_slides)
32+
create_learning_outcomes_slide(prs, content, total_slides)
33+
create_key_terms_slide(prs, content, total_slides)
34+
create_content_slides(prs, content, total_slides)
35+
create_activity_slides(prs, content, total_slides)
36+
create_quiz_slides(prs, content, total_slides)
37+
create_discussion_slides(prs, content, total_slides)
38+
create_further_readings_slides(prs, content, total_slides)
39+
facilitation_slide = create_facilitation_notes_slide(prs, content, total_slides)
40+
if facilitation_slide:
41+
total_slides += 1 # update for closing slide numbering if needed
42+
create_closing_slide(prs, content, total_slides, total_slides)
43+
return prs
44+
45+
46+
def create_pptx(content: dict, output_path: str, language: str = "en"):
47+
base_dir = os.path.dirname(os.path.abspath(__file__))
48+
normalized_output_path = os.path.abspath(output_path)
49+
allowed_output = os.path.abspath(os.path.join(base_dir, "..", "output"))
50+
parent = os.path.dirname(normalized_output_path)
51+
is_temp = parent.startswith(os.path.abspath(tempfile.gettempdir()))
52+
is_out = normalized_output_path.startswith(allowed_output)
53+
if not (is_temp or is_out):
54+
raise ValueError(
55+
"Security violation: Output path must be in allowed directories"
56+
)
57+
prs = build_full_presentation(content, language)
58+
prs.save(normalized_output_path)
59+
60+
61+
def cli_build(content_path, output_path, language="en"):
62+
with open(content_path, "r") as f:
63+
content = json.load(f)
64+
create_pptx(content, output_path, language)

0 commit comments

Comments
 (0)