Skip to content

Commit cf7cd4e

Browse files
committed
feat: enable content generation without source for slide (#351)
1 parent 703a49c commit cf7cd4e

File tree

10 files changed

+334
-98
lines changed

10 files changed

+334
-98
lines changed

backend/main.py

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Copyright (C) 2025 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
3-
4-
from fastapi import FastAPI, HTTPException, File, UploadFile
3+
from fastapi import BackgroundTasks, FastAPI, HTTPException, File, UploadFile
54
from pydantic import BaseModel
65
import fitz # PyMuPDF
76
from pathlib import Path
@@ -12,11 +11,15 @@
1211
from generate_image_embedding import generate_image_embedding
1312
from fastapi.responses import FileResponse, JSONResponse
1413
from generate_pptx import create_pptx
14+
from generate_pptx import create_pptx
1515
from starlette.background import BackgroundTask
1616
import tempfile
1717
import imagehash
1818
from PIL import Image
1919
import io
20+
import uuid
21+
from typing import Dict
22+
import json
2023

2124
app = FastAPI()
2225

@@ -26,22 +29,10 @@
2629
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
2730

2831

29-
@app.post("/parse")
30-
async def parse_pdf(file: UploadFile = File(...)):
31-
"""
32-
Endpoint to parse a PDF file uploaded via multipart/form-data.
33-
Extracts images, generates captions and embeddings, and returns the data.
34-
"""
35-
temp_file_path = None
32+
def process_pdf_to_file(job_id: str, pdf_path: str, filename: str):
3633
try:
37-
# Create temp file with delete=False to avoid Windows file locking issues
38-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
39-
temp_file.write(await file.read())
40-
temp_file_path = temp_file.name
41-
42-
print(f"DEBUG : Temporary PDF file created at: {temp_file_path}")
43-
# Open the PDF file using PyMuPDF (now works on Windows since file is closed)
44-
pdf_file = fitz.open(str(temp_file_path))
34+
print(f"Processing job {job_id}")
35+
pdf_file = fitz.open(str(pdf_path))
4536
image_data = []
4637
image_order = 1
4738
seen_hashes = set()
@@ -88,29 +79,62 @@ async def parse_pdf(file: UploadFile = File(...)):
8879

8980
# Prepare the response data
9081
response_data = {
91-
"name": file.filename,
82+
"name": filename,
9283
"details": f"Extracted {len(image_data)} images from the PDF.",
9384
"images": image_data,
9485
"text": extracted_text,
9586
}
9687

97-
return JSONResponse(content=response_data)
88+
temp_dir = tempfile.gettempdir()
89+
result_path = os.path.join(temp_dir, f"{job_id}.json")
90+
with open(result_path, "w") as f:
91+
json.dump(response_data, f)
9892

9993
except Exception as e:
100-
print(f"Error processing PDF: {e}")
101-
raise HTTPException(
102-
status_code=500, detail=f"An error occurred while processing the PDF: {e}"
103-
)
94+
print(f"Error in processing pdf job_id: {job_id}: {e}")
95+
10496
finally:
105-
# Clean up temporary file on Windows
106-
if temp_file_path and os.path.exists(temp_file_path):
107-
try:
108-
os.unlink(temp_file_path)
109-
print(f"DEBUG: Cleaned up temporary file: {temp_file_path}")
110-
except Exception as cleanup_error:
111-
print(
112-
f"Warning: Failed to clean up temporary file {temp_file_path}: {cleanup_error}"
113-
)
97+
try:
98+
if os.path.exists(pdf_path):
99+
os.remove(pdf_path)
100+
except Exception as cleanup_err:
101+
print(f"Warning: Failed to remove temporary PDF {pdf_path}: {cleanup_err}")
102+
103+
104+
@app.post("/upload")
105+
async def upload_file(
106+
file: UploadFile = File(...), background_tasks: BackgroundTasks = None
107+
):
108+
try:
109+
# Generate job ID
110+
job_id = str(uuid.uuid4())
111+
tmp_dir = tempfile.gettempdir()
112+
tmp_path = os.path.join(tmp_dir, f"{job_id}_{file.filename}")
113+
114+
# Save uploaded file to /tmp
115+
with open(tmp_path, "wb") as buffer:
116+
shutil.copyfileobj(file.file, buffer)
117+
118+
# Schedule background PDF processing
119+
background_tasks.add_task(process_pdf_to_file, job_id, tmp_path, file.filename)
120+
121+
return {"jobID": job_id}
122+
except Exception as e:
123+
raise HTTPException(status_code=500, detail=f"Error uploading file: {e}")
124+
125+
126+
@app.get("/result/{job_id}")
127+
def get_result(job_id: str):
128+
temp_dir = tempfile.gettempdir()
129+
result_path = os.path.join(temp_dir, f"{job_id}.json")
130+
if not os.path.exists(result_path):
131+
return JSONResponse(
132+
status_code=202, content={"message": "PDF processing not complete yet."}
133+
)
134+
135+
with open(result_path, "r") as f:
136+
result = json.load(f)
137+
return result
114138

115139

116140
class PPTXRequest(BaseModel):

frontend/src/app/api/slide/content-generator.ts

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import { createOllama } from 'ollama-ai-provider'
55
import { type CoreMessage, generateText } from 'ai'
66
import type { ClientSource } from '@/lib/types/client-source'
7+
import type { CourseInfo } from '@/lib/types/course-info-types'
78
import type {
89
LectureContent,
910
AssessmentQuestion,
@@ -42,6 +43,7 @@ export async function generateCourseContent(
4243
sessionLength: number,
4344
difficultyLevel: string,
4445
topicName: string,
46+
courseInfo?: CourseInfo,
4547
): Promise<LectureContent> {
4648
try {
4749
// Check for required environment variables
@@ -55,8 +57,11 @@ export async function generateCourseContent(
5557

5658
// Prepare source content
5759
console.log('Preparing source content...')
58-
const { content: assistantContent, metadata: sourceMetadata } =
59-
await prepareSourceContent(selectedSources)
60+
const { content: assistantContent, metadata: sourceMetadata } = await prepareSourceContent(
61+
selectedSources,
62+
topicName,
63+
courseInfo,
64+
)
6065

6166
// Ensure assistant content fits within context window
6267
const assistantMessage: CoreMessage = {
@@ -112,7 +117,9 @@ export async function generateCourseContent(
112117

113118
const metadataUserMessage: CoreMessage = {
114119
role: 'user',
115-
content: `Generate the title, learning outcomes, and at least 5-10 key terms for a ${difficultyLevel} level ${contentType} on "${topicName}" based STRICTLY on the provided source materials above.`,
120+
content: sourceMetadata.usingCourseContext
121+
? `Generate the title, learning outcomes, and at least 5-10 key terms for a ${difficultyLevel} level ${contentType} on "${topicName}" based on standard academic knowledge and best practices for this subject area.`
122+
: `Generate the title, learning outcomes, and at least 5-10 key terms for a ${difficultyLevel} level ${contentType} on "${topicName}" based STRICTLY on the provided source materials above.`,
116123
}
117124

118125
const metadataMessages = [metadataSystemMessage, assistantMessage, metadataUserMessage]
@@ -137,10 +144,17 @@ export async function generateCourseContent(
137144
const introSystemPrompt = `You are an expert educational content developer. Continue creating a ${difficultyLevel} level ${contentType} on "${topicName}" designed for a ${sessionLength}-minute session.
138145
139146
IMPORTANT INSTRUCTIONS:
140-
1. You MUST base your content ENTIRELY on the source materials provided.
147+
${
148+
sourceMetadata.usingCourseContext
149+
? `1. Since no specific source materials were provided, base your content on standard academic knowledge for the topic.
150+
2. Draw from established educational practices and common curriculum content for this subject area.
151+
3. Create content appropriate for the specified difficulty level and session length.
152+
4. Ensure the introduction provides context and importance of the topic based on general knowledge.`
153+
: `1. You MUST base your content ENTIRELY on the source materials provided.
141154
2. Extract key concepts, terminology, examples, and explanations directly from the source materials.
142155
3. Do not introduce concepts or information that is not present in the source materials.
143-
4. Create an engaging introduction that provides context and importance of the topic.
156+
4. Create an engaging introduction that provides context and importance of the topic.`
157+
}
144158
145159
RESPONSE FORMAT:
146160
Your response MUST be a valid JSON object with EXACTLY these fields:
@@ -157,7 +171,9 @@ CRITICAL: Your response MUST be valid JSON only. Do not include any text, markdo
157171

158172
const introUserMessage: CoreMessage = {
159173
role: 'user',
160-
content: `Generate an engaging introduction for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
174+
content: sourceMetadata.usingCourseContext
175+
? `Generate an engaging introduction for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.`
176+
: `Generate an engaging introduction for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
161177
}
162178

163179
const introMessages = [introSystemMessage, assistantMessage, introUserMessage]
@@ -179,9 +195,15 @@ CRITICAL: Your response MUST be valid JSON only. Do not include any text, markdo
179195
const specialSlidesSystemPrompt = `You are an expert educational content developer. Continue creating a ${difficultyLevel} level ${contentType} on "${topicName}" designed for a ${sessionLength}-minute session.
180196
181197
IMPORTANT INSTRUCTIONS:
182-
1. You MUST base your content ENTIRELY on the source materials provided.
198+
${
199+
sourceMetadata.usingCourseContext
200+
? `1. Since no specific source materials were provided, base your content on standard academic knowledge for the topic.
201+
2. Draw from established educational practices and common curriculum content for this subject area.
202+
3. Create content appropriate for the specified difficulty level and session length.`
203+
: `1. You MUST base your content ENTIRELY on the source materials provided.
183204
2. Extract key concepts, terminology, examples, and explanations directly from the source materials.
184-
3. Do not introduce concepts or information that is not present in the source materials.
205+
3. Do not introduce concepts or information that is not present in the source materials.`
206+
}
185207
4. Create ONLY the following special slides:
186208
- Introduction slide (first slide that introduces the topic)
187209
- Agenda/Overview slide (outlines what will be covered)
@@ -228,7 +250,9 @@ CRITICAL: Your response MUST be valid JSON only. Do not include any text, markdo
228250

229251
const specialSlidesUserMessage: CoreMessage = {
230252
role: 'user',
231-
content: `Generate the introduction, agenda, assessment, and conclusion slides for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
253+
content: sourceMetadata.usingCourseContext
254+
? `Generate the introduction, agenda, assessment, and conclusion slides for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.`
255+
: `Generate the introduction, agenda, assessment, and conclusion slides for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
232256
}
233257

234258
const specialSlidesMessages = [
@@ -291,9 +315,15 @@ CRITICAL: Your response MUST be valid JSON only. Do not include any text, markdo
291315
const contentSlidesSystemPrompt = `You are generating content slides ${startSlideNum} through ${endSlideNum} of a total of ${totalContentSlidesNeeded} content slides. Ensure all slides are unique.
292316
293317
IMPORTANT INSTRUCTIONS:
294-
1. You MUST base your content ENTIRELY on the source materials provided.
318+
${
319+
sourceMetadata.usingCourseContext
320+
? `1. Since no specific source materials were provided, base your content on standard academic knowledge for the topic.
321+
2. Draw from established educational practices and common curriculum content for this subject area.
322+
3. Create content appropriate for the specified difficulty level and session length.`
323+
: `1. You MUST base your content ENTIRELY on the source materials provided.
295324
2. Extract key concepts, terminology, examples, and explanations directly from the source materials.
296-
3. Do not introduce concepts or information that is not present in the source materials.
325+
3. Do not introduce concepts or information that is not present in the source materials.`
326+
}
297327
4. Create detailed teaching slides with substantial content on each slide.
298328
5. Focus ONLY on core teaching content slides.
299329
6. Each slide should have comprehensive speaker notes with additional details and examples.
@@ -324,7 +354,11 @@ CRITICAL: Your response MUST be valid JSON only. Do not include any text, markdo
324354

325355
const contentSlidesUserMessage: CoreMessage = {
326356
role: 'user',
327-
content: `Generate content slides ${startSlideNum} through ${endSlideNum} for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.
357+
content: sourceMetadata.usingCourseContext
358+
? `Generate content slides ${startSlideNum} through ${endSlideNum} for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.
359+
360+
DO NOT create introduction, agenda, assessment, or conclusion slides. Focus ONLY on core teaching content slides.`
361+
: `Generate content slides ${startSlideNum} through ${endSlideNum} for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.
328362
329363
DO NOT create introduction, agenda, assessment, or conclusion slides. Focus ONLY on core teaching content slides.`,
330364
}
@@ -381,7 +415,9 @@ DO NOT create introduction, agenda, assessment, or conclusion slides. Focus ONLY
381415

382416
const activitiesUserMessage: CoreMessage = {
383417
role: 'user',
384-
content: `Generate the activities for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
418+
content: sourceMetadata.usingCourseContext
419+
? `Generate the activities for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.`
420+
: `Generate the activities for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
385421
}
386422

387423
const activitiesMessages = [activitiesSystemMessage, assistantMessage, activitiesUserMessage]
@@ -417,7 +453,9 @@ DO NOT create introduction, agenda, assessment, or conclusion slides. Focus ONLY
417453

418454
const assessmentUserMessage: CoreMessage = {
419455
role: 'user',
420-
content: `Generate assessment ideas (without example questions) for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
456+
content: sourceMetadata.usingCourseContext
457+
? `Generate assessment ideas (without example questions) for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.`
458+
: `Generate assessment ideas (without example questions) for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
421459
}
422460

423461
const assessmentMessages = [assessmentSystemMessage, assistantMessage, assessmentUserMessage]
@@ -479,7 +517,9 @@ DO NOT create introduction, agenda, assessment, or conclusion slides. Focus ONLY
479517

480518
const readingsUserMessage: CoreMessage = {
481519
role: 'user',
482-
content: `Generate further reading suggestions for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
520+
content: sourceMetadata.usingCourseContext
521+
? `Generate further reading suggestions for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based on standard academic knowledge and best practices for this subject area.`
522+
: `Generate further reading suggestions for a ${difficultyLevel} level ${contentType} on "${topicName}" with title "${metadataResponse.title}" based STRICTLY on the provided source materials above.`,
483523
}
484524

485525
const readingsMessages = [readingsSystemMessage, assistantMessage, readingsUserMessage]

frontend/src/app/api/slide/route.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ export async function POST(req: Request) {
139139
sessionLength,
140140
difficultyLevel,
141141
topicName,
142+
courseInfo,
142143
} = requestData
143144

144145
console.log('Data from request:', {
@@ -149,6 +150,7 @@ export async function POST(req: Request) {
149150
sessionLength,
150151
difficultyLevel,
151152
topicName,
153+
courseInfo,
152154
})
153155

154156
// Generate course content
@@ -160,6 +162,7 @@ export async function POST(req: Request) {
160162
sessionLength,
161163
difficultyLevel,
162164
topicName,
165+
courseInfo,
163166
)
164167

165168
return NextResponse.json(generatedContent)

frontend/src/app/api/slide/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// Type definitions for course content generation
55

66
import { ClientSource } from '@/lib/types/client-source'
7+
import type { CourseInfo } from '@/lib/types/course-info-types'
78

89
export interface LectureSlide {
910
title: string
@@ -120,6 +121,7 @@ export interface CourseContentRequest {
120121
sessionLength: number
121122
difficultyLevel: string
122123
topicName: string
124+
courseInfo?: CourseInfo
123125
action?: string
124126
content?: LectureContent
125127
}

0 commit comments

Comments
 (0)