Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/gradescopeapi/classes/_helpers/_download_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pathlib import Path


def download_url(session, url: str, output_path: Path) -> Path:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
r = session.get(url, stream=True, allow_redirects=True)
r.raise_for_status()
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return output_path
169 changes: 169 additions & 0 deletions src/gradescopeapi/classes/_helpers/_export_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import json
import time
from urllib.parse import urljoin

from bs4 import BeautifulSoup

DEFAULT_POLL_INTERVAL = 1


def get_submission_download_url(
base_url: str,
course_id: str,
assignment_id: str,
submission_id: str,
) -> str:
return f"{base_url}/courses/{course_id}/assignments/{assignment_id}/submissions/{submission_id}.zip"


def _review_grades_url(base_url: str, course_id: str, assignment_id: str) -> str:
return f"{base_url}/courses/{course_id}/assignments/{assignment_id}/review_grades"


def _find_submission_zip_link_on_assignment_page(
soup: BeautifulSoup, base_url: str
) -> str | None:
for a in soup.find_all("a", href=True):
href = a.get("href", "")
if "/submissions/" in href and href.endswith(".zip"):
return href if href.startswith("http") else urljoin(base_url, href)
return None


def get_latest_submission_download_url(
session,
base_url: str,
course_id: str,
assignment_id: str,
) -> str:
url = f"{base_url}/courses/{course_id}/assignments/{assignment_id}"
r = session.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
link = _find_submission_zip_link_on_assignment_page(soup, base_url)
if link:
return link
raise ValueError("No 'Download Submission' link found on the assignment page.")


def _find_generated_file_link(soup: BeautifulSoup, base_url: str) -> str | None:
for a in soup.find_all("a", href=True):
href = a.get("href", "")
if "/generated_files/" in href and href.endswith(".zip"):
return href if href.startswith("http") else urljoin(base_url, href)
return None


def _poll_generated_file_until_ready(
session,
base_url: str,
course_id: str,
file_id: int,
poll_interval: float = DEFAULT_POLL_INTERVAL,
poll_max: int | None = None,
) -> str:
status_url = f"{base_url}/courses/{course_id}/generated_files/{file_id}.json"
headers = {"Accept": "application/json", "X-Requested-With": "XMLHttpRequest"}
attempt = 0
while poll_max is None or attempt < poll_max:
attempt += 1
r = session.get(status_url, headers=headers)
r.raise_for_status()
try:
data = r.json()
except json.JSONDecodeError:
raise ValueError(f"Unexpected response from {status_url}") from None
status = (data.get("status") or "").strip()
if status == "completed":
return f"{base_url}/courses/{course_id}/generated_files/{file_id}.zip"
if status not in ("processing", "unprocessed"):
raise ValueError(f"Export status: {status}")
time.sleep(poll_interval)
raise TimeoutError("Export did not complete in time.")


def _get_export_csrf_and_headers(soup: BeautifulSoup) -> tuple[str | None, dict]:
"""Return (csrf_token or None, headers) for the export request."""
csrf_meta = soup.find("meta", {"name": "csrf-token"})
token = (csrf_meta.get("content") if csrf_meta else None) or (
(soup.find("input", {"name": "authenticity_token"}) or {}).get("value")
)
headers = {
"Accept": "application/json, text/html",
"X-Requested-With": "XMLHttpRequest",
}
if token:
headers["X-CSRF-Token"] = token
return (token, headers)


def _get_file_id_from_export_response(r) -> int | None:
"""Parse export response; return generated file id if present."""
content_type = (r.headers.get("content-type") or "").strip().lower()
if "application/json" not in content_type:
return None
try:
data = r.json()
file_id = data.get("id") or data.get("generated_file_id")
if file_id is not None:
return int(file_id)
except (json.JSONDecodeError, ValueError, AttributeError, TypeError):
pass
return None


def get_export_all_download_url(
session,
base_url: str,
course_id: str,
assignment_id: str,
poll_interval: float = DEFAULT_POLL_INTERVAL,
poll_max: int | None = None,
) -> str:
"""Return the URL of the assignment export submissions zip file. This is not an idempotent operation.

If a zip is already available (e.g. from a prior export), returns its URL
immediately. Otherwise triggers export, waits until the file exists on the
server, then returns that URL. Later calls may be faster or slower depending
on whether the file is still available.
"""
submissions_url = _review_grades_url(base_url, course_id, assignment_id)
r = session.get(submissions_url)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
download_link = _find_generated_file_link(soup, base_url)
if download_link:
return download_link

export_url = f"{base_url}/courses/{course_id}/assignments/{assignment_id}/export"
token, headers = _get_export_csrf_and_headers(soup)
if token:
r = session.post(
export_url,
data={"authenticity_token": token},
headers=headers,
allow_redirects=False,
)
else:
r = session.get(export_url, headers=headers, allow_redirects=False)
r.raise_for_status()

# TODO: Before release, confirm whether export ever returns zip in response body.
# No logs or HAR evidence so far. Based on test results: uncomment zip-in-body block
# (restore return (None, r), return type tuple, branching in download_all_submissions)
# or remove it; and refine the ValueError message below if we see a specific failure shape.
# if r.headers.get("content-type", "").strip().startswith("application/zip"):
# return (None, r)

file_id = _get_file_id_from_export_response(r)
if file_id is not None:
return _poll_generated_file_until_ready(
session,
base_url,
course_id,
file_id,
poll_interval=poll_interval,
poll_max=poll_max,
)

raise ValueError("Export request did not return a usable response.")
191 changes: 191 additions & 0 deletions src/gradescopeapi/classes/_helpers/_submission_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Parse the review_grades submissions table into structured data."""

from datetime import datetime
from urllib.parse import urljoin
from zoneinfo import ZoneInfo

from bs4 import BeautifulSoup

_UTC = ZoneInfo("UTC")

# <time datetime="2025-09-22 08:00:55 -0400"> - offset is in the string.
_DATETIME_FMT = "%Y-%m-%d %H:%M:%S %z"


def _parse_submitted_at(cell) -> datetime | None:
"""Parse submission time from <time datetime="..."> (offset in string). Return UTC or None."""
if cell is None:
return None
time_tag = getattr(cell, "find", lambda *a, **k: None)("time")
if time_tag is None:
return None
dt_attr = time_tag.get("datetime")
if not dt_attr:
return None
try:
dt = datetime.strptime(dt_attr.strip(), _DATETIME_FMT)
return dt.astimezone(_UTC)
except ValueError:
return None


def _parse_sections_cell(cell) -> list[str]:
"""
Sections from DOM: .sectionsColumnCell--sectionSpan (one section per span).
If no spans, return [].
"""
if cell is None:
return []
spans = getattr(cell, "select", lambda *a: [])(".sectionsColumnCell--sectionSpan")
if not spans:
return []
return [s.get_text(strip=True) for s in spans if s.get_text(strip=True)]


def _has_icon(cell, class_substring: str) -> bool:
"""True if cell contains an <i> whose class list includes class_substring (e.g. 'fa-check')."""
if cell is None:
return False
def _class_contains(c):
if not c:
return False
return class_substring in (c if isinstance(c, list) else c.split())
icon = cell.find("i", class_=_class_contains)
return icon is not None


def _is_graded(cell) -> bool:
"""True if cell contains i.fa-check (Submission is graded). No fa-* for false in our HTML samples."""
return _has_icon(cell, "fa-check")


def _is_viewed(cell) -> bool:
"""True if cell contains i.fa-eye (Submission has been viewed). False = no fa-eye (e.g. statusIcon-inactive + 'Submission has not been viewed.')."""
return _has_icon(cell, "fa-eye")


def _is_canvas_linked(cell) -> bool:
"""True if cell contains i.fa-link (Linked to Canvas), False if i.fa-unlink (No Canvas link)."""
if cell is None:
return False
if _has_icon(cell, "fa-unlink"):
return False
return _has_icon(cell, "fa-link")


def _is_late(cell) -> bool:
"""True if the time cell contains the late badge."""
if cell is None:
return False
return cell.select_one(".lateSubmissionBadge") is not None


def _score_from_cell(cell) -> float | None:
"""Parse score cell text as float; None if missing or not a number."""
if cell is None:
return None
text = getattr(cell, "get_text", lambda *a: "")(" ", strip=True)
if not text or "doesn't have a submission" in text.lower():
return None
try:
return float(text)
except ValueError:
return None


def _row_cells_to_dict(tr, base_url: str) -> dict | None:
"""
Infer each field from the row's cells using DOM structure (tags, classes, attributes).
Returns a parsed row dict, or None if the row has no cells.
"""
cells = tr.find_all("td")
if not cells:
return None

# Submission link and student_name: first <a> under the first td.table--primaryLink (First Last column; second is Last, First)
primary_cells = tr.find_all("td", class_=lambda c: c and "table--primaryLink" in (c if isinstance(c, list) else c.split()))
first_primary = primary_cells[0] if primary_cells else None
link = first_primary.find("a", href=True) if first_primary else None
submission_url: str | None = None
if link and "/submissions/" in link.get("href", ""):
href = link.get("href", "")
submission_url = href if href.startswith("http") else urljoin(base_url, href)
# Name: link text when we have a submission; else first td.table--primaryLink; when there's no submission that cell has no class, so fall back to first cell
student_name = (
link.get_text() if (link and submission_url)
else (first_primary.get_text() if first_primary else (cells[0].get_text() if cells else None))
)

# Email: <td> containing <a href="mailto:...">
mailto = tr.find("a", href=lambda h: h and h.startswith("mailto:"))
email_cell = mailto.find_parent("td") if mailto else None

# Sections: <td> containing .sectionsColumnCell
sections_cell = next((td for td in cells if td.select_one(".sectionsColumnCell")), None)
if sections_cell is None:
# Fallback: cell with comma+slash (section-like content)
for td in cells:
text = td.get_text(" ", strip=True)
if "," in text and "/" in text:
sections_cell = td
break

# Time: <td> containing <time>
time_cell = next((td for td in cells if td.find("time")), None)

# Score, graded, viewed, canvas: find by DOM/content
score_cell = None
graded_cell = None
viewed_cell = None
canvas_cell = None
for td in cells:
text = td.get_text(" ", strip=True)
if _is_graded(td):
graded_cell = td
elif _is_viewed(td) or "not been viewed" in text.lower():
viewed_cell = td
elif _is_canvas_linked(td):
canvas_cell = td
elif _score_from_cell(td) is not None or "doesn't have a submission" in text.lower():
score_cell = td

return {
"submission_url": submission_url,
"student_name": student_name,
"email": email_cell.get_text() if email_cell else None,
"sections": _parse_sections_cell(sections_cell),
"score": _score_from_cell(score_cell) if submission_url and score_cell else None,
"graded": _is_graded(graded_cell) if submission_url else False,
"viewed": _is_viewed(viewed_cell) if submission_url else False,
"canvas_linked": _is_canvas_linked(canvas_cell) if submission_url else False,
"submitted_at": _parse_submitted_at(time_cell) if submission_url else None,
"late": _is_late(time_cell) if submission_url else False,
}


def parse_submissions_table(
soup: BeautifulSoup,
base_url: str,
course_id: str,
assignment_id: str,
) -> list[dict]:
"""
Parse the review_grades submissions table into a list of parsed row dicts.

Each row is interpreted by inspecting cell content/structure (no header lookup).
Each dict: submission_url (str | None; null = no submission), student_name, email, sections,
score (float | None), graded, viewed, canvas_linked (bool), submitted_at (datetime | None), late (bool).
"""
table = soup.find("table", id="submissions-table") or soup.find("table")
if not table:
return []

tbody = table.find("tbody")
trs = tbody.find_all("tr") if tbody else table.find_all("tr")[1:]
rows_data: list[dict] = []
for tr in trs:
row = _row_cells_to_dict(tr, base_url)
if row is not None and row["student_name"] and row["email"]:
rows_data.append(row)

return rows_data
Loading