diff --git a/CHANGELOG.md b/CHANGELOG.md
index c82b78d75..a6dd4083b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+#### LlamaParse Document Parser Integration (Issue #692)
+- **New LlamaParseParser** (`opencontractserver/pipeline/parsers/llamaparse_parser.py`): Full integration with LlamaParse API for document parsing with layout extraction
+  - Supports PDF and DOCX file types
+  - Extracts structural annotations (Title, Heading, Paragraph, Table, Figure, List, etc.) with bounding boxes
+  - Generates PAWLS tokens from LlamaParse layout data for PDF annotation display
+  - Supports multiple bounding box formats (fractional 0-1, absolute coordinates, array format)
+  - Configurable via environment variables or Django settings
+- **Environment variable configuration**:
+  - `LLAMAPARSE_API_KEY` / `LLAMA_CLOUD_API_KEY`: API key for LlamaParse authentication
+  - `LLAMAPARSE_RESULT_TYPE`: Output type ("json", "markdown", "text") - default: "json"
+  - `LLAMAPARSE_EXTRACT_LAYOUT`: Enable layout extraction with bounding boxes - default: True
+  - `LLAMAPARSE_NUM_WORKERS`: Parallel processing workers - default: 4
+  - `LLAMAPARSE_LANGUAGE`: Document language - default: "en"
+  - `LLAMAPARSE_VERBOSE`: Enable verbose logging - default: False
+- **Parser selection via environment variable**:
+  - `PDF_PARSER`: Set to "llamaparse", "docling" (default), or "nlm" to select default PDF parser
+  - Location: `config/settings/base.py:740-765`
+- **Comprehensive test suite** (`opencontractserver/tests/test_doc_parser_llamaparse.py`):
+  - Tests for successful parsing with layout extraction
+  - Tests for markdown mode without layout
+  - Tests for bounding box format conversion (fractional, absolute, array)
+  - Tests for annotation creation and token generation
+  - Tests for error handling (missing API key, API errors, empty results)
+  - Tests for configuration via settings and kwargs override
+
 #### Thread/Message Triggered Corpus Actions for Automated Moderation
 - **Extended CorpusActionTrigger enum** with `NEW_THREAD` and `NEW_MESSAGE` triggers (`opencontractserver/corpuses/models.py:849-854`) to enable automated moderation of discussion threads
 - **New moderation tools** (`opencontractserver/llms/tools/moderation_tools.py`): 9 tools for thread moderation including:
@@ -44,8 +69,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Automated responses (e.g., welcome messages for new threads)
 - Content classification (e.g., auto-pin important announcements)
 
-### Added
-
 #### Proactive Apollo Cache Management System (PR #725)
 - **New `CacheManager` service** (`frontend/src/services/cacheManager.ts`): Centralized Apollo cache management with debouncing, targeted invalidation, and auth-aware cache operations
   - `resetOnAuthChange()`: Full cache clear with optional refetch for login/logout transitions
@@ -56,6 +79,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **New `useCacheManager` hook** (`frontend/src/hooks/useCacheManager.ts`): React hook with memoized CacheManager instance and stable callback references
 - **Comprehensive test suite** (`frontend/src/services/__tests__/cacheManager.test.ts`, `frontend/src/hooks/__tests__/useCacheManager.test.tsx`): 30+ tests covering debouncing, error handling, lifecycle, singleton management, and auth scenarios
 
+### Technical Details
+
+#### LlamaParse Parser Architecture
+- Uses `llama-parse` library for API communication
+- JSON mode with `extract_layout=True` provides bounding boxes as fractions of page dimensions (0-1)
+- Converts LlamaParse layout elements to OpenContracts structural annotations
+- Generates PAWLS tokens by splitting text into words and distributing across bounding box
+- Element type mapping converts LlamaParse labels (title, paragraph, table, etc.) to OpenContracts annotation labels
+- Falls back to text extraction mode when layout extraction is disabled
+
 ### Fixed
 
 #### Cache Management Race Condition Fix (PR #725)
diff --git a/config/settings/base.py b/config/settings/base.py
index 0a407a1b8..44f8061d4 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -657,6 +657,16 @@
 )
 use_cloud_run_iam_auth = True
 
+# LlamaParse Settings - for LlamaParse document parser
+# Supports both LLAMAPARSE_API_KEY and LLAMA_CLOUD_API_KEY (LlamaIndex's default env var)
+_llamaparse_key = env.str("LLAMAPARSE_API_KEY", default="")
+LLAMAPARSE_API_KEY = _llamaparse_key or env.str("LLAMA_CLOUD_API_KEY", default="")
+LLAMAPARSE_RESULT_TYPE = env.str("LLAMAPARSE_RESULT_TYPE", default="json")
+LLAMAPARSE_EXTRACT_LAYOUT = env.bool("LLAMAPARSE_EXTRACT_LAYOUT", default=True)
+LLAMAPARSE_NUM_WORKERS = env.int("LLAMAPARSE_NUM_WORKERS", default=4)
+LLAMAPARSE_LANGUAGE = env.str("LLAMAPARSE_LANGUAGE", default="en")
+LLAMAPARSE_VERBOSE = env.bool("LLAMAPARSE_VERBOSE", default=False)
+
 # LLM SETTING
 OPENAI_API_KEY = env.str("OPENAI_API_KEY", default="")
 OPENAI_MODEL = env.str("OPENAI_MODEL", default="gpt-4o")
@@ -728,13 +738,29 @@
     "SENTENCE_TRANSFORMER_MODELS_PATH", default="/models/sentence-transformers"
 )
 
+# Parser selection via environment variable
+# Options: "docling" (default), "llamaparse", "nlm"
+PDF_PARSER = env.str("PDF_PARSER", default="docling")
+
+# Map parser names to their full paths
+_PDF_PARSER_MAP = {
+    "docling": "opencontractserver.pipeline.parsers.docling_parser_rest.DoclingParser",
+    "llamaparse": "opencontractserver.pipeline.parsers.llamaparse_parser.LlamaParseParser",
+    "nlm": "opencontractserver.pipeline.parsers.nlm_ingest_parser.NLMIngestParser",
+}
+
+# Get the selected PDF parser (with fallback to docling)
+_SELECTED_PDF_PARSER = _PDF_PARSER_MAP.get(
+    PDF_PARSER.lower(), _PDF_PARSER_MAP["docling"]
+)
+
 # Preferred parsers for each MIME type
 PREFERRED_PARSERS = {
-    "application/pdf": "opencontractserver.pipeline.parsers.docling_parser_rest.DoclingParser",
+    "application/pdf": _SELECTED_PDF_PARSER,
     "text/plain": "opencontractserver.pipeline.parsers.oc_text_parser.TxtParser",
     "application/txt": "opencontractserver.pipeline.parsers.oc_text_parser.TxtParser",
-    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "opencontractserver.pipeline.parsers.docling_parser_rest.DoclingParser",  # noqa
-    "application/vnd.openxmlformats-officedocument.presentationml.presentation": "opencontractserver.pipeline.parsers.docling_parser_rest.DoclingParser",  # noqa
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": _SELECTED_PDF_PARSER,  # noqa
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation": _SELECTED_PDF_PARSER,  # noqa
     "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "opencontractserver.pipeline.parsers.docling_parser_rest.DoclingParser",  # noqa
 }
 
@@ -802,6 +828,14 @@
         "api_key": "",
         "use_ocr": True,
     },
+    "opencontractserver.pipeline.parsers.llamaparse_parser.LlamaParseParser": {
+        "api_key": LLAMAPARSE_API_KEY,
+        "result_type": "json",
+        "extract_layout": True,
+        "num_workers": 4,
+        "language": "en",
+        "verbose": False,
+    },
 }
 
 # Analyzers
diff --git a/docs/pipelines/docling_parser.md b/docs/pipelines/docling_parser.md
index aa24241d1..4d5446a48 100644
--- a/docs/pipelines/docling_parser.md
+++ b/docs/pipelines/docling_parser.md
@@ -316,6 +316,8 @@ Common issues and solutions:
 ## See Also
 
 - [Pipeline Overview](pipeline_overview.md)
+- [LlamaParse Parser](llamaparse_parser.md) - Cloud-based alternative
+- [NLM-Ingest Parser](nlm_ingest_parser.md) - Another local alternative
 - [PDF Data Layer Architecture](../architecture/PDF-data-layer.md)
 - [Document Processing Flow](../architecture/asynchronous-processing.md)
 - [Docling Library](https://github.com/DS4SD/docling)
diff --git a/docs/pipelines/llamaparse_parser.md b/docs/pipelines/llamaparse_parser.md
new file mode 100644
index 000000000..913c57b43
--- /dev/null
+++ b/docs/pipelines/llamaparse_parser.md
@@ -0,0 +1,376 @@
+# LlamaParse Parser
+
+## Intro
+
+The LlamaParse Parser integrates with [LlamaParse](https://cloud.llamaindex.ai/) (from LlamaIndex) to parse PDF and DOCX documents with advanced layout extraction. It provides high-quality structural annotations with bounding boxes, making it ideal for complex document layouts.
+
+LlamaParse is a cloud-based API service that uses advanced ML models to extract document structure, including titles, headings, paragraphs, tables, figures, and more. Unlike the Docling parser which runs as a local microservice, LlamaParse requires an API key and sends documents to LlamaIndex's cloud infrastructure.
+
+## Architecture
+
+```mermaid
+sequenceDiagram
+    participant U as User
+    participant LP as LlamaParseParser
+    participant API as LlamaParse Cloud API
+    participant DB as Database
+
+    U->>LP: parse_document(user_id, doc_id)
+    LP->>DB: Load document
+    LP->>LP: Write to temp file
+    LP->>API: HTTP POST with document
+    API->>API: ML-based parsing
+    API->>API: Layout extraction
+    API-->>LP: JSON with layout data
+    LP->>LP: Parse bounding boxes
+    LP->>LP: Create annotations (bbox only)
+    LP->>DB: Store parsed data
+    LP-->>U: OpenContractDocExport
+```
+
+## Features
+
+- **Cloud-based API**: Uses LlamaIndex's managed parsing infrastructure
+- **Layout Extraction**: Returns bounding boxes for all document elements
+- **Multiple Output Formats**: Supports JSON (with layout), markdown, and plain text
+- **Structural Annotations**: Automatically creates annotations for document structure
+- **Multi-format Support**: Parses both PDF and DOCX files
+- **Parallel Processing**: Configurable worker count for batch processing
+- **Automatic OCR**: Handles scanned documents automatically
+
+## Configuration
+
+### Environment Variables
+
+Configure the parser using environment variables:
+
+```bash
+# Required: API key (either variable works)
+LLAMAPARSE_API_KEY=llx-your-api-key-here
+# OR use LlamaIndex's default env var name:
+LLAMA_CLOUD_API_KEY=llx-your-api-key-here
+
+# Optional: Output format ("json", "markdown", "text")
+# Default: "json" - required for layout extraction
+LLAMAPARSE_RESULT_TYPE=json
+
+# Optional: Enable layout extraction with bounding boxes
+# Default: True
+LLAMAPARSE_EXTRACT_LAYOUT=True
+
+# Optional: Number of parallel workers for batch processing
+# Default: 4
+LLAMAPARSE_NUM_WORKERS=4
+
+# Optional: Document language code
+# Default: "en"
+LLAMAPARSE_LANGUAGE=en
+
+# Optional: Enable verbose logging
+# Default: False
+LLAMAPARSE_VERBOSE=False
+
+# Select LlamaParse as the default PDF parser
+PDF_PARSER=llamaparse
+```
+
+### Django Settings
+
+The parser is configured in `config/settings/base.py`:
+
+```python
+# LlamaParse Settings
+LLAMAPARSE_API_KEY = env.str("LLAMAPARSE_API_KEY", default="")
+LLAMAPARSE_RESULT_TYPE = env.str("LLAMAPARSE_RESULT_TYPE", default="json")
+LLAMAPARSE_EXTRACT_LAYOUT = env.bool("LLAMAPARSE_EXTRACT_LAYOUT", default=True)
+LLAMAPARSE_NUM_WORKERS = env.int("LLAMAPARSE_NUM_WORKERS", default=4)
+LLAMAPARSE_LANGUAGE = env.str("LLAMAPARSE_LANGUAGE", default="en")
+LLAMAPARSE_VERBOSE = env.bool("LLAMAPARSE_VERBOSE", default=False)
+
+# Parser selection
+PDF_PARSER = env.str("PDF_PARSER", default="docling")  # Set to "llamaparse"
+```
+
+### Parser Registration
+
+The parser is automatically registered in `PREFERRED_PARSERS` when `PDF_PARSER=llamaparse`:
+
+```python
+PREFERRED_PARSERS = {
+    "application/pdf": "opencontractserver.pipeline.parsers.llamaparse_parser.LlamaParseParser",
+    # ... other mime types
+}
+```
+
+## Usage
+
+### Basic Usage
+
+```python
+from opencontractserver.pipeline.parsers.llamaparse_parser import LlamaParseParser
+
+parser = LlamaParseParser()
+result = parser.parse_document(user_id=1, doc_id=123)
+```
+
+### With Options Override
+
+```python
+# Override default settings for a specific parse
+result = parser.parse_document(
+    user_id=1,
+    doc_id=123,
+    result_type="json",
+    extract_layout=True,
+    language="en",
+    verbose=True,
+)
+```
+
+### Text-Only Mode (No Layout)
+
+```python
+# For faster parsing without bounding boxes
+result = parser.parse_document(
+    user_id=1,
+    doc_id=123,
+    result_type="markdown",  # or "text"
+    extract_layout=False,
+)
+```
+
+## Output
+
+The parser returns an `OpenContractDocExport` dictionary containing:
+
+```python
+{
+    "title": str,                    # Document title
+    "description": str,              # Document description
+    "content": str,                  # Full text content
+    "page_count": int,               # Number of pages
+    "pawls_file_content": List[dict],  # PAWLS token data per page
+    "labelled_text": List[dict],     # Structural annotations
+    "relationships": List[dict],     # (Empty - no relationships extracted)
+    "doc_labels": List[dict],        # (Empty - no doc labels extracted)
+}
+```
+
+## Element Type Mapping
+
+LlamaParse elements are mapped to OpenContracts annotation labels:
+
+| LlamaParse Type | OpenContracts Label |
+|-----------------|---------------------|
+| `title` | Title |
+| `section_header` | Section Header |
+| `heading` | Heading |
+| `text` | Text Block |
+| `paragraph` | Paragraph |
+| `table` | Table |
+| `figure` | Figure |
+| `image` | Image |
+| `list` | List |
+| `list_item` | List Item |
+| `caption` | Caption |
+| `footnote` | Footnote |
+| `header` | Page Header |
+| `footer` | Page Footer |
+| `page_number` | Page Number |
+| `equation` | Equation |
+| `code` | Code Block |
+
+## Processing Steps
+
+1. **Document Loading**
+   - Loads document from Django storage
+   - Writes to temporary file (LlamaParse requires file path)
+
+2. **API Call**
+   - Sends document to LlamaParse cloud API
+   - Uses `get_json_result()` for layout mode
+   - Uses `load_data()` for text/markdown mode
+
+3. **Bounding Box Conversion**
+   - LlamaParse returns coordinates in various formats (fractional 0-1 or absolute)
+   - Converts to absolute page coordinates
+   - Handles multiple bbox formats (`x/y/w/h`, `left/top/right/bottom`, `x1/y1/x2/y2`, arrays)
+   - Applies sanity checks and bounds clamping
+
+4. **Annotation Creation**
+   - Maps element types to OpenContracts labels
+   - Creates structural annotations with bounding boxes
+   - Annotations use empty `tokensJsons` (see [Limitations](#limitations))
+
+5. **Cleanup**
+   - Removes temporary file
+   - Returns OpenContractDocExport
+
+## Comparison with Other Parsers
+
+| Feature | LlamaParse | Docling | NLM-Ingest |
+|---------|------------|---------|------------|
+| Deployment | Cloud API | Local microservice | Local microservice |
+| API Key Required | Yes | No | No |
+| Layout Extraction | Yes | Yes | Yes |
+| Relationship Detection | No | Yes (groups) | Limited |
+| OCR Support | Yes (automatic) | Yes (Tesseract) | Yes |
+| DOCX Support | Yes | Yes | No |
+| Cost | Per-page pricing | Free | Free |
+| Privacy | Cloud processing | Local processing | Local processing |
+
+## Error Handling
+
+The parser handles errors gracefully:
+
+- **Missing API Key**: Returns None with error log
+- **Document Not Found**: Returns None with error log
+- **API Errors**: Returns None with detailed error message
+- **Import Errors**: Returns None if `llama-parse` not installed
+- **Empty Results**: Returns None with warning
+
+Example error handling:
+
+```python
+result = parser.parse_document(user_id=1, doc_id=123)
+if result is None:
+    # Check logs for error details
+    logger.error("Parsing failed")
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **API Key Not Configured**
+   ```
+   LlamaParse API key not configured. Set LLAMAPARSE_API_KEY or LLAMA_CLOUD_API_KEY environment variable.
+   ```
+   - Set `LLAMAPARSE_API_KEY` in your environment
+   - Verify the key is valid at [cloud.llamaindex.ai](https://cloud.llamaindex.ai/)
+
+2. **Library Not Installed**
+   ```
+   llama-parse library not installed. Install with: pip install llama-parse
+   ```
+   - Install the library: `pip install llama-parse`
+   - Or add to requirements: `llama-parse>=0.4.0`
+
+3. **Empty Results**
+   ```
+   LlamaParse returned empty results
+   ```
+   - Verify document is readable (not corrupted)
+   - Check if document has extractable text
+   - Try with `verbose=True` for more details
+
+4. **No Bounding Boxes**
+   - Ensure `result_type="json"` (not "markdown" or "text")
+   - Ensure `extract_layout=True`
+   - Some document types may not support layout extraction
+
+5. **Rate Limiting**
+   - LlamaParse has API rate limits
+   - Reduce `num_workers` for batch processing
+   - Implement retry logic for production use
+
+### Debug Mode
+
+Enable verbose logging for troubleshooting:
+
+```bash
+LLAMAPARSE_VERBOSE=True
+```
+
+Or in code:
+
+```python
+result = parser.parse_document(user_id=1, doc_id=123, verbose=True)
+```
+
+## Performance Considerations
+
+- **Network Latency**: Cloud API adds network round-trip time
+- **Per-page Pricing**: LlamaParse charges per page processed
+- **Parallel Workers**: Increase `LLAMAPARSE_NUM_WORKERS` for batch jobs
+- **Result Type**: "markdown" and "text" modes are faster but lack layout
+- **File Size**: Large documents may take longer to upload and process
+
+## Security Considerations
+
+- **API Key Security**: Store API key in environment variables, not code
+- **Data Privacy**: Documents are sent to LlamaIndex cloud for processing
+- **Temporary Files**: Parser cleans up temp files after processing
+- **Logging**: API key is redacted from log output
+
+## Dependencies
+
+- `llama-parse>=0.4.0`: LlamaParse Python client
+- `llama-index-core`: Core LlamaIndex library (installed with llama-parse)
+
+Add to requirements:
+
+```
+llama-parse>=0.4.0
+```
+
+## Limitations
+
+LlamaParse has several limitations compared to other parsers like Docling:
+
+### No Token-Level Data
+
+LlamaParse only provides **element-level bounding boxes**, not token-level (word-level) positions. This means:
+
+- Annotations display as bounding box outlines only, without individual word highlighting
+- The `tokensJsons` field in annotations is empty
+- Text selection and word-level interactions are not available for LlamaParse-generated annotations
+- The frontend handles this gracefully by showing just the bounding box boundary
+
+**Workaround**: If you need token-level precision, use the Docling parser instead, which provides full PAWLS token data.
+
+### No Parent-Child Relationships
+
+LlamaParse returns **flat layout blocks** without hierarchical structure:
+
+- No parent/child relationships between elements (e.g., list items under a list)
+- No nesting information for sections/subsections
+- The `relationships` field in the export is always empty
+- Document structure must be inferred from element types and spatial positioning
+
+**Workaround**: Use the Docling parser for relationship detection, which can group related elements.
+
+### Cloud Processing Required
+
+- Documents are sent to LlamaIndex's cloud infrastructure for processing
+- Requires internet connectivity
+- Subject to LlamaIndex's data handling policies
+- Not suitable for highly sensitive documents that cannot leave your network
+
+**Workaround**: Use Docling or NLM-Ingest for fully local processing.
+
+### Per-Page Pricing
+
+- LlamaParse charges per page processed (with layout extraction: 1 extra credit per page)
+- Costs can add up for large document volumes
+- Free tier has limited credits
+
+### Bounding Box Precision
+
+- Bounding boxes may be slightly larger or smaller than the actual content
+- Complex layouts (multi-column, overlapping elements) may have less accurate boxes
+- Tables and figures are detected as single blocks without internal structure
+
+### No Streaming Support
+
+- Entire document must be uploaded and processed before results are returned
+- Large documents may have significant processing time
+- No progress indicators during parsing
+
+## See Also
+
+- [Pipeline Overview](pipeline_overview.md)
+- [Docling Parser](docling_parser.md) - Local ML-based alternative with token-level data and relationships
+- [NLM-Ingest Parser](nlm_ingest_parser.md) - Another local alternative
+- [LlamaParse Documentation](https://developers.llamaindex.ai/python/cloud/llamaparse/)
+- [LlamaIndex Cloud](https://cloud.llamaindex.ai/)
diff --git a/docs/pipelines/nlm_ingest_parser.md b/docs/pipelines/nlm_ingest_parser.md
index 4014d2134..2d8e31f13 100644
--- a/docs/pipelines/nlm_ingest_parser.md
+++ b/docs/pipelines/nlm_ingest_parser.md
@@ -170,16 +170,19 @@ class NLMIngestParser(BaseParser):
 - **File Size**: Can handle large PDF files efficiently
 - **Concurrent Processing**: Thread-safe for parallel processing
 
-## Comparison with Docling Parser
-
-| Feature | NLM Ingest | Docling |
-|---------|------------|---------|
-| Speed | Faster | Slower |
-| Accuracy | Good | Excellent |
-| OCR Support | Limited | Full |
-| Table Extraction | Good | Excellent |
-| Memory Usage | Lower | Higher |
-| Dependencies | Simpler | Complex |
+## Comparison with Other Parsers
+
+| Feature | NLM Ingest | Docling | LlamaParse |
+|---------|------------|---------|------------|
+| Deployment | Local | Local microservice | Cloud API |
+| Speed | Faster | Slower | Network-dependent |
+| Accuracy | Good | Excellent | Excellent |
+| OCR Support | Limited | Full | Full (automatic) |
+| Table Extraction | Good | Excellent | Good |
+| Memory Usage | Lower | Higher | Minimal (cloud) |
+| Dependencies | Simpler | Complex | Simple (API client) |
+| Cost | Free | Free | Per-page pricing |
+| Privacy | Local | Local | Cloud processing |
 
 ## Best Practices
 
@@ -252,6 +255,7 @@ Required Python packages:
 ## See Also
 
 - [Pipeline Overview](pipeline_overview.md)
-- [Docling Parser](docling_parser.md)
+- [Docling Parser](docling_parser.md) - ML-based local parser with OCR
+- [LlamaParse Parser](llamaparse_parser.md) - Cloud-based alternative
 - [PDF Data Layer Architecture](../architecture/PDF-data-layer.md)
 - [NLM Ingest Library](https://github.com/nlmatics/nlm-ingestor)
diff --git a/docs/pipelines/pipeline_overview.md b/docs/pipelines/pipeline_overview.md
index 3530e38b1..71c10c55b 100644
--- a/docs/pipelines/pipeline_overview.md
+++ b/docs/pipelines/pipeline_overview.md
@@ -23,6 +23,7 @@ graph TD
         B --> B1[DoclingParser REST]
         B --> B2[NLMIngestParser]
         B --> B3[TxtParser]
+        B --> B4[LlamaParseParser]
 
         C --> C1[PdfThumbnailGenerator]
         C --> C2[TextThumbnailGenerator]
@@ -88,6 +89,7 @@ class BaseParser(ABC):
 
 Current implementations:
 - **DoclingParser**: Advanced PDF parser using machine learning (REST microservice)
+- **LlamaParseParser**: Cloud-based parser using LlamaParse API with layout extraction
 - **NLMIngestParser**: Alternative PDF parser using NLM Ingest library
 - **TxtParser**: Simple text file parser
 
diff --git a/frontend/src/graphql/mutations.ts b/frontend/src/graphql/mutations.ts
index 9b6fe1bf3..9dd4a1e76 100644
--- a/frontend/src/graphql/mutations.ts
+++ b/frontend/src/graphql/mutations.ts
@@ -3318,7 +3318,7 @@ export interface RestoreDeletedDocumentOutput {
 }
 
 export const PERMANENTLY_DELETE_DOCUMENT = gql`
-  mutation PermanentlyDeleteDocument($documentId: ID!, $corpusId: ID!) {
+  mutation PermanentlyDeleteDocument($documentId: String!, $corpusId: String!) {
     permanentlyDeleteDocument(documentId: $documentId, corpusId: $corpusId) {
       ok
       message
@@ -3339,7 +3339,7 @@ export interface PermanentlyDeleteDocumentOutput {
 }
 
 export const EMPTY_TRASH = gql`
-  mutation EmptyTrash($corpusId: ID!) {
+  mutation EmptyTrash($corpusId: String!) {
     emptyTrash(corpusId: $corpusId) {
       ok
       message
diff --git a/opencontractserver/pipeline/parsers/llamaparse_parser.py b/opencontractserver/pipeline/parsers/llamaparse_parser.py
new file mode 100644
index 000000000..5475482ff
--- /dev/null
+++ b/opencontractserver/pipeline/parsers/llamaparse_parser.py
@@ -0,0 +1,663 @@
+"""
+LlamaParse Parser for OpenContracts.
+
+This parser uses the LlamaParse API (from LlamaIndex) to parse PDF documents
+and extract structural annotations with bounding boxes.
+"""
+
+import logging
+import os
+import tempfile
+from typing import Any, Optional
+
+from django.conf import settings
+from django.core.files.storage import default_storage
+
+from opencontractserver.annotations.models import TOKEN_LABEL
+from opencontractserver.documents.models import Document
+from opencontractserver.pipeline.base.file_types import FileTypeEnum
+from opencontractserver.pipeline.base.parser import BaseParser
+from opencontractserver.types.dicts import (
+    BoundingBoxPythonType,
+    OpenContractDocExport,
+    OpenContractsAnnotationPythonType,
+    OpenContractsSinglePageAnnotationType,
+    PawlsPagePythonType,
+    PawlsTokenPythonType,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class LlamaParseParser(BaseParser):
+    """
+    A parser that uses the LlamaParse API to parse PDF documents.
+
+    LlamaParse provides advanced document parsing with layout extraction,
+    returning bounding boxes for various document elements (titles, text,
+    tables, figures, lists).
+
+    Configuration via environment variables:
+        - LLAMAPARSE_API_KEY: API key for LlamaParse (required)
+        - LLAMAPARSE_RESULT_TYPE: Output type (default: "json")
+        - LLAMAPARSE_EXTRACT_LAYOUT: Whether to extract layout (default: True)
+        - LLAMAPARSE_NUM_WORKERS: Number of parallel workers (default: 4)
+        - LLAMAPARSE_LANGUAGE: Document language (default: "en")
+        - LLAMAPARSE_VERBOSE: Enable verbose logging (default: False)
+    """
+
+    title = "LlamaParse Parser"
+    description = (
+        "Parses PDF documents using the LlamaParse API with layout extraction."
+    )
+    author = "OpenContracts Team"
+    dependencies = ["llama-parse"]
+    supported_file_types = [FileTypeEnum.PDF, FileTypeEnum.DOCX]
+
+    # Mapping from LlamaParse element types to OpenContracts annotation labels
+    ELEMENT_TYPE_MAPPING = {
+        "title": "Title",
+        "section_header": "Section Header",
+        "heading": "Heading",
+        "text": "Text Block",
+        "paragraph": "Paragraph",
+        "table": "Table",
+        "figure": "Figure",
+        "image": "Image",
+        "list": "List",
+        "list_item": "List Item",
+        "caption": "Caption",
+        "footnote": "Footnote",
+        "header": "Page Header",
+        "footer": "Page Footer",
+        "page_number": "Page Number",
+        "equation": "Equation",
+        "code": "Code Block",
+    }
+
+    def __init__(self):
+        """Initialize the LlamaParse parser with configuration from settings."""
+        super().__init__()
+
+        # Get API key from settings (which reads from env vars, supporting both
+        # LLAMAPARSE_API_KEY and LLAMA_CLOUD_API_KEY)
+        self.api_key = getattr(settings, "LLAMAPARSE_API_KEY", "")
+
+        # Get other configuration options
+        self.result_type = getattr(settings, "LLAMAPARSE_RESULT_TYPE", "json")
+        self.extract_layout = getattr(settings, "LLAMAPARSE_EXTRACT_LAYOUT", True)
+        self.num_workers = getattr(settings, "LLAMAPARSE_NUM_WORKERS", 4)
+        self.language = getattr(settings, "LLAMAPARSE_LANGUAGE", "en")
+        self.verbose = getattr(settings, "LLAMAPARSE_VERBOSE", False)
+
+        logger.info(
+            f"LlamaParseParser initialized with extract_layout={self.extract_layout}, "
+            f"language={self.language}"
+        )
+
+    def _parse_document_impl(
+        self, user_id: int, doc_id: int, **all_kwargs
+    ) -> Optional[OpenContractDocExport]:
+        """
+        Parse a document using the LlamaParse API.
+
+        Args:
+            user_id: ID of the user requesting the parse.
+            doc_id: ID of the document to parse.
+            **all_kwargs: Additional configuration options that can override defaults:
+                - api_key: Override the API key
+                - result_type: Output type ("json", "markdown", "text")
+                - extract_layout: Whether to extract layout/bounding boxes
+                - num_workers: Number of parallel workers
+                - language: Document language
+                - verbose: Enable verbose logging
+
+        Returns:
+            OpenContractDocExport with the parsed document data, or None if parsing failed.
+        """
+        # Redact sensitive keys before logging
+        safe_kwargs = {
+            k: ("***" if k == "api_key" else v) for k, v in all_kwargs.items()
+        }
+        logger.info(
+            f"LlamaParseParser - Parsing doc {doc_id} for user {user_id} "
+            f"with effective kwargs: {safe_kwargs}"
+        )
+
+        # Override settings with kwargs if provided
+        api_key = all_kwargs.get("api_key", self.api_key)
+        result_type = all_kwargs.get("result_type", self.result_type)
+        extract_layout = all_kwargs.get("extract_layout", self.extract_layout)
+        num_workers = all_kwargs.get("num_workers", self.num_workers)
+        language = all_kwargs.get("language", self.language)
+        verbose = all_kwargs.get("verbose", self.verbose)
+
+        if not api_key:
+            logger.error(
+                "LlamaParse API key not configured. Set LLAMAPARSE_API_KEY or "
+                "LLAMA_CLOUD_API_KEY environment variable."
+            )
+            return None
+
+        # Get the document
+        try:
+            document = Document.objects.get(pk=doc_id)
+        except Document.DoesNotExist:
+            logger.error(f"Document {doc_id} not found")
+            return None
+
+        # Determine which file to use
+        if document.pdf_file and document.pdf_file.name:
+            doc_path = document.pdf_file.name
+        else:
+            logger.error(f"No PDF file found for document {doc_id}")
+            return None
+
+        try:
+            # Import llama-parse here to avoid import errors if not installed
+            from llama_parse import LlamaParse
+
+            # Initialize the parser
+            parser = LlamaParse(
+                api_key=api_key,
+                result_type=result_type,
+                num_workers=num_workers,
+                verbose=verbose,
+                language=language,
+            )
+
+            # Read the file from storage and write to a temp file
+            # (LlamaParse needs a file path)
+            with default_storage.open(doc_path, "rb") as doc_file:
+                doc_bytes = doc_file.read()
+
+            # Determine file extension from document type
+            file_type = document.file_type.lower() if document.file_type else "pdf"
+            suffix = f".{file_type}" if file_type in ("pdf", "docx") else ".pdf"
+
+            # Create a temporary file - use a nested try-finally to ensure cleanup
+            # on all exit paths (success, error, or early return)
+            temp_file_path = None
+            try:
+                with tempfile.NamedTemporaryFile(
+                    suffix=suffix, delete=False
+                ) as temp_file:
+                    temp_file.write(doc_bytes)
+                    temp_file_path = temp_file.name
+
+                # Parse the document
+                logger.info("Sending document to LlamaParse API...")
+
+                # Use get_json_result for JSON with layout data
+                if result_type == "json" and extract_layout:
+                    # For JSON with layout, we need to use the async API or
+                    # get_json_result method
+                    json_results = parser.get_json_result(temp_file_path)
+
+                    if not json_results:
+                        logger.error("LlamaParse returned empty results")
+                        return None
+
+                    # Convert to OpenContracts format
+                    return self._convert_json_to_opencontracts(
+                        document, json_results, extract_layout
+                    )
+                else:
+                    # For markdown/text output, use load_data
+                    documents = parser.load_data(temp_file_path)
+
+                    if not documents:
+                        logger.error("LlamaParse returned empty results")
+                        return None
+
+                    # Convert simple text/markdown output
+                    return self._convert_text_to_opencontracts(document, documents)
+
+            finally:
+                # Clean up temp file - always runs on any exit path
+                if temp_file_path and os.path.exists(temp_file_path):
+                    os.unlink(temp_file_path)
+
+        except ImportError:
+            logger.error(
+                "llama-parse library not installed. "
+                "Install with: pip install llama-parse"
+            )
+            return None
+        except Exception as e:
+            import traceback
+
+            stacktrace = traceback.format_exc()
+            logger.error(f"LlamaParse parsing failed: {e}\n{stacktrace}")
+            return None
+
+    def _convert_json_to_opencontracts(
+        self,
+        document: Document,
+        json_results: list[dict[str, Any]],
+        extract_layout: bool = True,
+    ) -> OpenContractDocExport:
+        """
+        Convert LlamaParse JSON results to OpenContracts format.
+
+        Args:
+            document: The Document model instance.
+            json_results: List of JSON results from LlamaParse.
+            extract_layout: Whether layout data with bounding boxes is included.
+
+        Returns:
+            OpenContractDocExport with parsed data.
+        """
+        # The first result contains the parsed document
+        result = json_results[0] if json_results else {}
+        pages = result.get("pages", [])
+
+        # Build the full text content
+        full_text_parts = []
+        pawls_pages: list[PawlsPagePythonType] = []
+        annotations: list[OpenContractsAnnotationPythonType] = []
+
+        # Track annotation IDs
+        annotation_id_counter = 0
+
+        for page_idx, page in enumerate(pages):
+            page_text = page.get("text", "")
+            full_text_parts.append(page_text)
+
+            # Log full page structure on first page for debugging
+            if page_idx == 0:
+                page_keys = list(page.keys())
+                logger.info(f"DEBUG: Page keys: {page_keys}")
+
+            # Get page dimensions (default to standard US Letter size in points: 8.5" x 11")
+            # Note: A4 size would be 595 x 842 points
+            # LlamaParse may use different key names for dimensions
+            DEFAULT_WIDTH = 612
+            DEFAULT_HEIGHT = 792
+            page_width = page.get("width", page.get("w", page.get("pageWidth")))
+            page_height = page.get("height", page.get("h", page.get("pageHeight")))
+
+            # Validate dimensions - must be positive numbers
+            if page_width is None or page_width <= 0:
+                page_width = DEFAULT_WIDTH
+                logger.warning(
+                    f"Page {page_idx} has invalid width, using default: {page_width}"
+                )
+            if page_height is None or page_height <= 0:
+                page_height = DEFAULT_HEIGHT
+                logger.warning(
+                    f"Page {page_idx} has invalid height, using default: {page_height}"
+                )
+
+            # Create PAWLS page structure
+            pawls_page: PawlsPagePythonType = {
+                "page": {
+                    "width": page_width,
+                    "height": page_height,
+                    "index": page_idx,
+                },
+                "tokens": [],
+            }
+
+            # Extract layout elements if available
+            layout_elements = page.get("layout", []) if extract_layout else []
+            items = page.get("items", [])
+
+            # Process items (elements with text and positions)
+            # Debug: Log first few items to understand bbox format
+            if page_idx == 0 and items:
+                logger.info(f"DEBUG: Page dimensions: {page_width}x{page_height}")
+                # Log full structure of first item for debugging
+                if items:
+                    logger.info(f"DEBUG: Full first item structure: {items[0]}")
+                for i, debug_item in enumerate(items[:3]):
+                    # Check all possible bbox key names
+                    bbox_val = debug_item.get(
+                        "bBox",
+                        debug_item.get("bbox", debug_item.get("bounding_box", "NONE")),
+                    )
+                    logger.info(
+                        f"DEBUG: Item {i} keys: {debug_item.keys()}, "
+                        f"bBox: {bbox_val}, "
+                        f"text: {debug_item.get('text', debug_item.get('value', ''))[:50]}"
+                    )
+
+            for item in items:
+                item_text = item.get("text", "") or item.get("value", "")
+                item_type = item.get("type", "text").lower()
+                # LlamaParse uses 'bBox' (camelCase), also check 'bbox' and 'bounding_box'
+                bbox = item.get("bBox", item.get("bbox", item.get("bounding_box", {})))
+
+                if not item_text.strip():
+                    continue
+
+                # Parse bbox to get bounds (no tokens - LlamaParse doesn't provide them)
+                _, bounds = self._create_pawls_tokens_from_bbox(
+                    item_text,
+                    bbox,
+                    page_width,
+                    page_height,
+                    annotation_id_counter,  # Just used for debug logging
+                )
+
+                # Create annotation for this element
+                label = self.ELEMENT_TYPE_MAPPING.get(item_type, "Text Block")
+                annotation = self._create_annotation(
+                    annotation_id=str(annotation_id_counter),
+                    label=label,
+                    raw_text=item_text,
+                    page_idx=page_idx,
+                    bounds=bounds,
+                )
+                annotations.append(annotation)
+                annotation_id_counter += 1
+
+            # If no items but we have layout, process layout elements
+            if not items and layout_elements:
+                for element in layout_elements:
+                    element_type = element.get("label", "text").lower()
+                    # Check all possible bbox key names (bBox, bbox, bounding_box)
+                    bbox = element.get(
+                        "bBox", element.get("bbox", element.get("bounding_box", {}))
+                    )
+                    element_text = element.get("text", "")
+
+                    if not element_text and element_type not in ["figure", "image"]:
+                        continue
+
+                    # Parse bbox to get bounds (no tokens - LlamaParse doesn't provide them)
+                    _, bounds = self._create_pawls_tokens_from_bbox(
+                        element_text or f"[{element_type}]",
+                        bbox,
+                        page_width,
+                        page_height,
+                        annotation_id_counter,  # Just used for debug logging
+                    )
+
+                    label = self.ELEMENT_TYPE_MAPPING.get(element_type, "Text Block")
+                    annotation = self._create_annotation(
+                        annotation_id=str(annotation_id_counter),
+                        label=label,
+                        raw_text=element_text or f"[{element_type}]",
+                        page_idx=page_idx,
+                        bounds=bounds,
+                    )
+                    annotations.append(annotation)
+                    annotation_id_counter += 1
+
+            pawls_pages.append(pawls_page)
+
+        # Combine all text
+        full_text = "\n\n".join(full_text_parts)
+
+        # Build the export
+        export: OpenContractDocExport = {
+            "title": document.title,
+            "content": full_text,
+            "description": document.description or "",
+            "pawls_file_content": pawls_pages,
+            "page_count": len(pages),
+            "doc_labels": [],
+            "labelled_text": annotations,
+            "relationships": [],
+        }
+
+        logger.info(
+            f"Converted LlamaParse output: {len(pages)} pages, "
+            f"{len(annotations)} annotations"
+        )
+
+        return export
+
+    def _convert_text_to_opencontracts(
+        self,
+        document: Document,
+        llama_documents: list[Any],
+    ) -> OpenContractDocExport:
+        """
+        Convert simple text/markdown LlamaParse output to OpenContracts format.
+
+        This is used when layout extraction is not enabled.
+
+        Args:
+            document: The Document model instance.
+            llama_documents: List of LlamaIndex Document objects.
+
+        Returns:
+            OpenContractDocExport with parsed data.
+        """
+        # Combine text from all documents
+        full_text = "\n\n".join(doc.text for doc in llama_documents if doc.text)
+
+        # Without layout data, we create a minimal export
+        export: OpenContractDocExport = {
+            "title": document.title,
+            "content": full_text,
+            "description": document.description or "",
+            "pawls_file_content": [],
+            "page_count": len(llama_documents) or 1,
+            "doc_labels": [],
+            "labelled_text": [],
+            "relationships": [],
+        }
+
+        logger.info(
+            f"Converted LlamaParse text output: {len(llama_documents)} documents, "
+            f"{len(full_text)} characters"
+        )
+
+        return export
+
+    def _create_pawls_tokens_from_bbox(
+        self,
+        text: str,
+        bbox: dict[str, Any],
+        page_width: float,
+        page_height: float,
+        start_token_idx: int,
+    ) -> tuple[list[PawlsTokenPythonType], BoundingBoxPythonType]:
+        """
+        Create PAWLS tokens from text and bounding box.
+
+        LlamaParse returns bounding boxes as fractions (0-1) of page dimensions.
+        We need to convert these to absolute coordinates.
+
+        Args:
+            text: The text content.
+            bbox: Bounding box dict with keys like 'x', 'y', 'width', 'height' or
+                  'left', 'top', 'right', 'bottom' (as fractions 0-1).
+            page_width: Page width in points.
+            page_height: Page height in points.
+            start_token_idx: Starting token index.
+
+        Returns:
+            Tuple of (list of PAWLS tokens, overall bounding box).
+        """
+        tokens: list[PawlsTokenPythonType] = []
+
+        # Default margin constant (1 inch = 72 points)
+        DEFAULT_MARGIN = 72
+        # Default bottom position for fallback bounding boxes (~1.4 inches from top)
+        # This provides reasonable vertical space for a single-line text element
+        DEFAULT_BOTTOM = 100
+
+        # Parse bounding box - handle different formats from LlamaParse
+        # LlamaParse may return fractional coordinates (0-1) or absolute coordinates
+        bbox_format = "none"
+        is_fractional = False
+
+        if not bbox:
+            # No bbox, create a default one with standard margins
+            bbox_format = "default/empty"
+            left, top = DEFAULT_MARGIN, DEFAULT_MARGIN
+            right, bottom = page_width - DEFAULT_MARGIN, DEFAULT_BOTTOM
+        elif "x1" in bbox and "y1" in bbox:
+            # Format: {x1, y1, x2, y2} - corner coordinates
+            bbox_format = "x1/y1/x2/y2"
+            x1 = float(bbox.get("x1", 0))
+            y1 = float(bbox.get("y1", 0))
+            x2 = float(bbox.get("x2", 0))
+            y2 = float(bbox.get("y2", 0))
+
+            # Check if fractional
+            is_fractional = all(0 <= v <= 1.0 for v in [x1, y1, x2, y2])
+            if is_fractional:
+                left = x1 * page_width
+                top = y1 * page_height
+                right = x2 * page_width
+                bottom = y2 * page_height
+            else:
+                left, top, right, bottom = x1, y1, x2, y2
+        elif "x" in bbox and "y" in bbox:
+            # Format: {x, y, width/w, height/h}
+            # LlamaParse uses 'w' and 'h' shorthand
+            bbox_format = "x/y/w/h"
+            x = float(bbox.get("x", 0))
+            y = float(bbox.get("y", 0))
+            w = float(bbox.get("w", bbox.get("width", 0.1)))
+            h = float(bbox.get("h", bbox.get("height", 0.02)))
+
+            # Check if values are fractions (0-1) or absolute
+            # Heuristic: if both corners (x,y) and (x+w,y+h) are in [0,1], treat as fractional
+            is_fractional = (
+                0 <= x <= 1.0
+                and 0 <= y <= 1.0
+                and 0 <= (x + w) <= 1.0
+                and 0 <= (y + h) <= 1.0
+            )
+            if is_fractional:
+                left = x * page_width
+                top = y * page_height
+                right = (x + w) * page_width
+                bottom = (y + h) * page_height
+            else:
+                left, top = x, y
+                right = x + w
+                bottom = y + h
+        elif "left" in bbox:
+            # Format: {left, top, right, bottom}
+            bbox_format = "left/top/right/bottom"
+            bbox_l = float(bbox.get("left", 0))
+            bbox_t = float(bbox.get("top", 0))
+            bbox_r = float(bbox.get("right", 1))
+            bbox_b = float(bbox.get("bottom", 0.05))
+
+            # Check if ALL values are in [0,1] range - indicates fractional coordinates
+            is_fractional = all(0 <= v <= 1.0 for v in [bbox_l, bbox_t, bbox_r, bbox_b])
+            if is_fractional:
+                left = bbox_l * page_width
+                top = bbox_t * page_height
+                right = bbox_r * page_width
+                bottom = bbox_b * page_height
+            else:
+                left, top, right, bottom = bbox_l, bbox_t, bbox_r, bbox_b
+        elif isinstance(bbox, (list, tuple)) and len(bbox) >= 4:
+            # Format: [x1, y1, x2, y2] or [left, top, right, bottom]
+            bbox_format = "array[4]"
+            vals = [float(v) for v in bbox[:4]]
+            # Check if ALL values are in [0,1] range - indicates fractional coordinates
+            is_fractional = all(0 <= v <= 1.0 for v in vals)
+            if is_fractional:
+                left = vals[0] * page_width
+                top = vals[1] * page_height
+                right = vals[2] * page_width
+                bottom = vals[3] * page_height
+            else:
+                left, top, right, bottom = vals
+        else:
+            # Unknown format, use defaults with standard margins
+            bbox_format = f"unknown:{type(bbox).__name__}"
+            left, top = DEFAULT_MARGIN, DEFAULT_MARGIN
+            right, bottom = page_width - DEFAULT_MARGIN, DEFAULT_BOTTOM
+
+        # Sanity checks and bounds validation
+        # Ensure left < right and top < bottom (swap if needed)
+        if left > right:
+            left, right = right, left
+        if top > bottom:
+            top, bottom = bottom, top
+
+        # Clamp to page bounds
+        left = max(0, min(left, page_width))
+        right = max(0, min(right, page_width))
+        top = max(0, min(top, page_height))
+        bottom = max(0, min(bottom, page_height))
+
+        # Ensure minimum dimensions (at least 1 point)
+        if right - left < 1:
+            right = left + 1
+        if bottom - top < 1:
+            bottom = top + 1
+
+        # NOTE: We do NOT create fake tokens here. LlamaParse only provides element-level
+        # bounding boxes, not token-level data. Creating fake tokens by evenly distributing
+        # words across the bbox produces incorrect highlights. The frontend handles
+        # annotations with empty tokensJsons gracefully - it just shows the bounding box
+        # without individual token highlights.
+
+        # Create overall bounding box
+        bounds: BoundingBoxPythonType = {
+            "left": left,
+            "top": top,
+            "right": right,
+            "bottom": bottom,
+        }
+
+        # Debug logging for first few conversions
+        if start_token_idx < 5:
+            logger.info(
+                f"DEBUG bbox: format={bbox_format}, fractional={is_fractional}, "
+                f"input={bbox}"
+            )
+            logger.info(
+                f"DEBUG output: bounds=({left:.1f}, {top:.1f}, {right:.1f}, {bottom:.1f}), "
+                f"page={page_width:.0f}x{page_height:.0f}"
+            )
+
+        # Return empty tokens list - we don't have real token data from LlamaParse
+        return tokens, bounds
+
+    def _create_annotation(
+        self,
+        annotation_id: str,
+        label: str,
+        raw_text: str,
+        page_idx: int,
+        bounds: BoundingBoxPythonType,
+    ) -> OpenContractsAnnotationPythonType:
+        """
+        Create an OpenContracts annotation.
+
+        Args:
+            annotation_id: Unique ID for the annotation.
+            label: The annotation label.
+            raw_text: The text content.
+            page_idx: Page index (0-based).
+            bounds: Bounding box.
+
+        Returns:
+            OpenContractsAnnotationPythonType annotation.
+        """
+        # NOTE: We use empty tokensJsons because LlamaParse only provides element-level
+        # bounding boxes, not token-level data. The frontend handles this gracefully
+        # by showing just the bounding box without individual token highlights.
+
+        # Create page annotation with empty token references
+        page_annotation: OpenContractsSinglePageAnnotationType = {
+            "bounds": bounds,
+            "tokensJsons": [],  # Empty - no token data from LlamaParse
+            "rawText": raw_text,
+        }
+
+        annotation: OpenContractsAnnotationPythonType = {
+            "id": annotation_id,
+            "annotationLabel": label,
+            "rawText": raw_text,
+            "page": page_idx,
+            "annotation_json": {str(page_idx): page_annotation},
+            "parent_id": None,
+            "annotation_type": TOKEN_LABEL,
+            "structural": True,
+        }
+
+        return annotation
diff --git a/opencontractserver/tests/test_doc_parser_llamaparse.py b/opencontractserver/tests/test_doc_parser_llamaparse.py
new file mode 100644
index 000000000..8938ea145
--- /dev/null
+++ b/opencontractserver/tests/test_doc_parser_llamaparse.py
@@ -0,0 +1,879 @@
+"""
+Tests for the LlamaParseParser class.
+
+Tests cover:
+- Successful document parsing with JSON/layout output
+- Bounding box parsing and conversion
+- Structural annotation creation (without token-level data)
+- Error handling (missing API key, API errors, etc.)
+- Configuration via environment variables
+
+Note: LlamaParse only provides element-level bounding boxes, not token-level data.
+Annotations are created with empty tokensJsons - the frontend handles this gracefully
+by showing just the bounding box outline without individual token highlights.
+"""
+
+import sys
+from unittest.mock import MagicMock, patch
+
+from django.contrib.auth import get_user_model
+from django.core.files.base import ContentFile
+from django.db import transaction
+from django.test import TestCase, override_settings
+
+from opencontractserver.documents.models import Document
+from opencontractserver.pipeline.parsers.llamaparse_parser import LlamaParseParser
+
+User = get_user_model()
+
+# Create a mock llama_parse module for testing since it may not be installed
+mock_llama_parse = MagicMock()
+mock_llama_parse.LlamaParse = MagicMock()
+sys.modules["llama_parse"] = mock_llama_parse
+
+
+class MockLlamaDocument:
+    """Mock LlamaIndex Document object."""
+
+    def __init__(self, text: str):
+        self.text = text
+
+
+class TestLlamaParseParser(TestCase):
+    """Tests for the LlamaParseParser class."""
+
+    def setUp(self):
+        """Set up test environment."""
+        with transaction.atomic():
+            self.user = User.objects.create_user(
+                username="testuser", password="testpass123"
+            )
+
+        # Create a sample Document object with a mock PDF file
+        self.doc = Document.objects.create(
+            title="Test LlamaParse Document",
+            description="Test Description",
+            file_type="pdf",
+            creator=self.user,
+        )
+
+        # Create a minimal valid PDF for testing
+        pdf_content = b"%PDF-1.7\n1 0 obj\n<</Type/Catalog/Pages 2 0 R>>\nendobj\n2 0 obj\n<</Type/Pages/Count 1/Kids[3 0 R]>>\nendobj\n3 0 obj\n<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Resources<<>>>>\nendobj\nxref\n0 4\n0000000000 65535 f\n0000000010 00000 n\n0000000053 00000 n\n0000000102 00000 n\ntrailer\n<</Size 4/Root 1 0 R>>\nstartxref\n178\n%%EOF\n"  # noqa: E501
+        self.doc.pdf_file.save("test_llama.pdf", ContentFile(pdf_content))
+
+        # Sample JSON response from LlamaParse with layout data
+        # Note: LlamaParse uses 'bBox' (camelCase) with 'w'/'h' keys
+        self.sample_json_response = [
+            {
+                "pages": [
+                    {
+                        "text": "This is the first page of the document.",
+                        "width": 612,
+                        "height": 792,
+                        "items": [
+                            {
+                                "type": "title",
+                                "text": "Document Title",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 39.6,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                            },
+                            {
+                                "type": "paragraph",
+                                "text": "This is a paragraph with some content.",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 118.8,
+                                    "w": 489.6,
+                                    "h": 79.2,
+                                },
+                            },
+                            {
+                                "type": "table",
+                                "text": "Column A | Column B\nValue 1 | Value 2",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 237.6,
+                                    "w": 489.6,
+                                    "h": 158.4,
+                                },
+                            },
+                        ],
+                        "layout": [
+                            {
+                                "label": "title",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 39.6,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "confidence": 0.95,
+                                "isLikelyNoise": False,
+                            },
+                        ],
+                    },
+                    {
+                        "text": "This is the second page.",
+                        "width": 612,
+                        "height": 792,
+                        "items": [
+                            {
+                                "type": "text",
+                                "text": "More content on page 2.",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 79.2,
+                                    "w": 489.6,
+                                    "h": 79.2,
+                                },
+                            },
+                        ],
+                    },
+                ]
+            }
+        ]
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_success_with_layout(
+        self, mock_open, mock_llama_parse_class
+    ):
+        """Test successful document parsing with layout extraction."""
+        # Mock file reading
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        # Mock the LlamaParse instance
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = self.sample_json_response
+        mock_llama_parse_class.return_value = mock_parser
+
+        # Create parser and parse document
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        # Verify result structure
+        self.assertIsNotNone(result)
+        self.assertEqual(result["title"], "Test LlamaParse Document")
+        self.assertEqual(result["page_count"], 2)
+
+        # Verify PAWLS content was generated
+        self.assertIn("pawls_file_content", result)
+        self.assertEqual(len(result["pawls_file_content"]), 2)
+
+        # Verify first page structure
+        first_page = result["pawls_file_content"][0]
+        self.assertEqual(first_page["page"]["index"], 0)
+        self.assertEqual(first_page["page"]["width"], 612)
+        self.assertEqual(first_page["page"]["height"], 792)
+        # LlamaParse doesn't provide token-level data, so tokens list is empty
+        self.assertEqual(len(first_page["tokens"]), 0)
+
+        # Verify annotations were created
+        self.assertIn("labelled_text", result)
+        self.assertGreater(len(result["labelled_text"]), 0)
+
+        # Verify annotation structure
+        first_annotation = result["labelled_text"][0]
+        self.assertEqual(first_annotation["annotationLabel"], "Title")
+        self.assertEqual(first_annotation["structural"], True)
+        self.assertEqual(first_annotation["annotation_type"], "TOKEN_LABEL")
+        self.assertIn("annotation_json", first_annotation)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_markdown_mode(self, mock_open, mock_llama_parse_class):
+        """Test document parsing with markdown output (no layout)."""
+        # Mock file reading
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        # Mock the LlamaParse instance for markdown mode
+        mock_parser = MagicMock()
+        mock_parser.load_data.return_value = [
+            MockLlamaDocument("# Title\n\nThis is the document content."),
+            MockLlamaDocument("## Section 2\n\nMore content here."),
+        ]
+        mock_llama_parse_class.return_value = mock_parser
+
+        # Create parser and parse document with markdown mode
+        parser = LlamaParseParser()
+        result = parser.parse_document(
+            user_id=self.user.id,
+            doc_id=self.doc.id,
+            result_type="markdown",
+            extract_layout=False,
+        )
+
+        # Verify result structure
+        self.assertIsNotNone(result)
+        self.assertEqual(result["title"], "Test LlamaParse Document")
+        self.assertIn("# Title", result["content"])
+        self.assertEqual(result["page_count"], 2)
+
+        # Verify no PAWLS content (markdown mode)
+        self.assertEqual(result["pawls_file_content"], [])
+
+        # Verify no annotations (markdown mode without layout)
+        self.assertEqual(result["labelled_text"], [])
+
+    def test_parse_document_no_api_key(self):
+        """Test that parsing fails gracefully without API key."""
+        with override_settings(LLAMAPARSE_API_KEY=""):
+            parser = LlamaParseParser()
+            parser.api_key = ""  # Ensure no API key
+
+            result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+            self.assertIsNone(result)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_api_error(self, mock_open, mock_llama_parse_class):
+        """Test handling of API errors."""
+        # Mock file reading
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        # Mock the LlamaParse instance to raise an error
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.side_effect = Exception("API rate limit exceeded")
+        mock_llama_parse_class.return_value = mock_parser
+
+        # Create parser and attempt to parse
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        # Should return None on error
+        self.assertIsNone(result)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_empty_result(self, mock_open, mock_llama_parse_class):
+        """Test handling of empty results from API."""
+        # Mock file reading
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        # Mock empty response
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = []
+        mock_llama_parse_class.return_value = mock_parser
+
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        self.assertIsNone(result)
+
+    def test_parse_document_nonexistent(self):
+        """Test parsing a document that doesn't exist."""
+        with override_settings(LLAMAPARSE_API_KEY="test-api-key-123"):
+            parser = LlamaParseParser()
+            result = parser.parse_document(user_id=self.user.id, doc_id=999999)
+
+            self.assertIsNone(result)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    def test_parse_document_no_pdf_file(self):
+        """Test parsing a document without a PDF file."""
+        # Create a document without a PDF file
+        doc_without_pdf = Document.objects.create(
+            title="No PDF Document",
+            description="Test",
+            file_type="pdf",
+            creator=self.user,
+        )
+
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=doc_without_pdf.id)
+
+        self.assertIsNone(result)
+
+
+class TestLlamaParseParserBboxConversion(TestCase):
+    """Tests for bounding box conversion methods.
+
+    Note: LlamaParse only provides element-level bounding boxes, not token-level data.
+    The _create_pawls_tokens_from_bbox method returns empty tokens list and just the bounds.
+    """
+
+    def setUp(self):
+        """Set up test environment."""
+        self.parser = LlamaParseParser()
+
+    def test_bbox_fractional_xy_format(self):
+        """Test conversion of fractional x,y,width,height bbox format."""
+        bbox = {"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.1}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="test word",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        # Check bounds are converted to absolute coordinates
+        self.assertAlmostEqual(bounds["left"], 61.2, places=1)
+        self.assertAlmostEqual(bounds["top"], 158.4, places=1)
+        self.assertAlmostEqual(bounds["right"], 244.8, places=1)
+        self.assertAlmostEqual(bounds["bottom"], 237.6, places=1)
+
+        # Tokens list is empty - we don't generate fake tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_llamaparse_format(self):
+        """Test conversion of LlamaParse's actual format: bBox with x/y/w/h."""
+        # This is the actual format LlamaParse uses (absolute coordinates)
+        bbox = {"x": 72.1, "y": 35.4, "w": 467.35, "h": 151}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="LlamaParse format test",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        # Should be treated as absolute coordinates since values > 1
+        self.assertAlmostEqual(bounds["left"], 72.1, places=1)
+        self.assertAlmostEqual(bounds["top"], 35.4, places=1)
+        self.assertAlmostEqual(bounds["right"], 539.45, places=1)  # x + w
+        self.assertAlmostEqual(bounds["bottom"], 186.4, places=1)  # y + h
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_fractional_ltrb_format(self):
+        """Test conversion of fractional left,top,right,bottom bbox format."""
+        bbox = {"left": 0.1, "top": 0.2, "right": 0.9, "bottom": 0.3}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="hello world",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        self.assertAlmostEqual(bounds["left"], 61.2, places=1)
+        self.assertAlmostEqual(bounds["top"], 158.4, places=1)
+        self.assertAlmostEqual(bounds["right"], 550.8, places=1)
+        self.assertAlmostEqual(bounds["bottom"], 237.6, places=1)
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_array_format(self):
+        """Test conversion of array bbox format [x1, y1, x2, y2]."""
+        bbox = [0.1, 0.2, 0.9, 0.3]
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="array format test",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        self.assertAlmostEqual(bounds["left"], 61.2, places=1)
+        self.assertAlmostEqual(bounds["top"], 158.4, places=1)
+        self.assertAlmostEqual(bounds["right"], 550.8, places=1)
+        self.assertAlmostEqual(bounds["bottom"], 237.6, places=1)
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_absolute_coordinates(self):
+        """Test handling of absolute coordinate bbox (values > 1)."""
+        bbox = {"x": 100, "y": 200, "width": 300, "height": 50}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="absolute coords",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        # When values > 1, they're treated as absolute
+        self.assertEqual(bounds["left"], 100)
+        self.assertEqual(bounds["top"], 200)
+        self.assertEqual(bounds["right"], 400)  # x + width
+        self.assertEqual(bounds["bottom"], 250)  # y + height
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_empty(self):
+        """Test handling of empty/missing bbox."""
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="no bbox",
+            bbox={},
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        # Should use default margins
+        self.assertEqual(bounds["left"], 72)
+        self.assertEqual(bounds["top"], 72)
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_x1_y1_x2_y2_format(self):
+        """Test conversion of x1/y1/x2/y2 corner coordinate format."""
+        bbox = {"x1": 0.1, "y1": 0.2, "x2": 0.9, "y2": 0.3}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="corner format test",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        self.assertAlmostEqual(bounds["left"], 61.2, places=1)
+        self.assertAlmostEqual(bounds["top"], 158.4, places=1)
+        self.assertAlmostEqual(bounds["right"], 550.8, places=1)
+        self.assertAlmostEqual(bounds["bottom"], 237.6, places=1)
+
+        # No tokens
+        self.assertEqual(len(tokens), 0)
+
+    def test_bbox_sanity_checks(self):
+        """Test that sanity checks are applied to bounding boxes."""
+        # Test bounds are clamped to page
+        bbox = {"x": -10, "y": -10, "w": 1000, "h": 1000}
+        tokens, bounds = self.parser._create_pawls_tokens_from_bbox(
+            text="out of bounds",
+            bbox=bbox,
+            page_width=612,
+            page_height=792,
+            start_token_idx=0,
+        )
+
+        # Should be clamped to page bounds
+        self.assertGreaterEqual(bounds["left"], 0)
+        self.assertGreaterEqual(bounds["top"], 0)
+        self.assertLessEqual(bounds["right"], 612)
+        self.assertLessEqual(bounds["bottom"], 792)
+
+
+class TestLlamaParseParserAnnotations(TestCase):
+    """Tests for annotation creation methods.
+
+    Note: LlamaParse annotations use empty tokensJsons since LlamaParse
+    only provides element-level bounding boxes, not token-level data.
+    """
+
+    def setUp(self):
+        """Set up test environment."""
+        self.parser = LlamaParseParser()
+
+    def test_create_annotation_structure(self):
+        """Test annotation creation has correct structure."""
+        bounds = {"left": 100, "top": 100, "right": 300, "bottom": 150}
+
+        annotation = self.parser._create_annotation(
+            annotation_id="anno-1",
+            label="Title",
+            raw_text="Sample Title",
+            page_idx=0,
+            bounds=bounds,
+        )
+
+        # Check required fields
+        self.assertEqual(annotation["id"], "anno-1")
+        self.assertEqual(annotation["annotationLabel"], "Title")
+        self.assertEqual(annotation["rawText"], "Sample Title")
+        self.assertEqual(annotation["page"], 0)
+        self.assertEqual(annotation["structural"], True)
+        self.assertEqual(annotation["annotation_type"], "TOKEN_LABEL")
+        self.assertIsNone(annotation["parent_id"])
+
+        # Check annotation_json structure
+        self.assertIn("0", annotation["annotation_json"])
+        page_anno = annotation["annotation_json"]["0"]
+        self.assertEqual(page_anno["bounds"], bounds)
+        self.assertEqual(page_anno["rawText"], "Sample Title")
+        # tokensJsons is empty - LlamaParse doesn't provide token-level data
+        self.assertEqual(len(page_anno["tokensJsons"]), 0)
+
+    def test_element_type_mapping(self):
+        """Test that element types are properly mapped to labels."""
+        type_mappings = {
+            "title": "Title",
+            "paragraph": "Paragraph",
+            "table": "Table",
+            "figure": "Figure",
+            "list": "List",
+            "heading": "Heading",
+            "unknown_type": "Text Block",  # Default
+        }
+
+        for element_type, expected_label in type_mappings.items():
+            label = LlamaParseParser.ELEMENT_TYPE_MAPPING.get(
+                element_type, "Text Block"
+            )
+            self.assertEqual(
+                label,
+                expected_label,
+                f"Element type '{element_type}' should map to '{expected_label}'",
+            )
+
+
+class TestLlamaParseParserConfiguration(TestCase):
+    """Tests for parser configuration."""
+
+    def test_default_configuration(self):
+        """Test default configuration values."""
+        with override_settings(
+            LLAMAPARSE_API_KEY="test-key",
+            LLAMAPARSE_RESULT_TYPE="json",
+            LLAMAPARSE_EXTRACT_LAYOUT=True,
+            LLAMAPARSE_NUM_WORKERS=4,
+            LLAMAPARSE_LANGUAGE="en",
+            LLAMAPARSE_VERBOSE=False,
+        ):
+            parser = LlamaParseParser()
+
+            self.assertEqual(parser.result_type, "json")
+            self.assertEqual(parser.extract_layout, True)
+            self.assertEqual(parser.num_workers, 4)
+            self.assertEqual(parser.language, "en")
+            self.assertEqual(parser.verbose, False)
+
+    def test_custom_configuration(self):
+        """Test custom configuration via settings."""
+        with override_settings(
+            LLAMAPARSE_API_KEY="custom-key",
+            LLAMAPARSE_RESULT_TYPE="markdown",
+            LLAMAPARSE_EXTRACT_LAYOUT=False,
+            LLAMAPARSE_NUM_WORKERS=8,
+            LLAMAPARSE_LANGUAGE="de",
+            LLAMAPARSE_VERBOSE=True,
+        ):
+            parser = LlamaParseParser()
+
+            self.assertEqual(parser.result_type, "markdown")
+            self.assertEqual(parser.extract_layout, False)
+            self.assertEqual(parser.num_workers, 8)
+            self.assertEqual(parser.language, "de")
+            self.assertEqual(parser.verbose, True)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-key")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_kwargs_override_settings(self, mock_open, mock_llama_parse_class):
+        """Test that kwargs override settings."""
+        with transaction.atomic():
+            user = User.objects.create_user(
+                username="configtestuser", password="pass123"
+            )
+
+        doc = Document.objects.create(
+            title="Config Test Doc",
+            file_type="pdf",
+            creator=user,
+        )
+        doc.pdf_file.save("config_test.pdf", ContentFile(b"%PDF-1.4 test"))
+
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = [{"pages": []}]
+        mock_llama_parse_class.return_value = mock_parser
+
+        parser = LlamaParseParser()
+        parser.parse_document(
+            user_id=user.id,
+            doc_id=doc.id,
+            language="fr",
+            num_workers=16,
+        )
+
+        # Verify LlamaParse was called with overridden values
+        mock_llama_parse_class.assert_called_once()
+        call_kwargs = mock_llama_parse_class.call_args.kwargs
+        self.assertEqual(call_kwargs["language"], "fr")
+        self.assertEqual(call_kwargs["num_workers"], 16)
+
+
+class TestLlamaParseParserLayoutOnlyProcessing(TestCase):
+    """Tests for layout-only processing (when items are empty but layout exists)."""
+
+    def setUp(self):
+        """Set up test environment."""
+        with transaction.atomic():
+            self.user = User.objects.create_user(
+                username="layouttestuser", password="testpass123"
+            )
+
+        self.doc = Document.objects.create(
+            title="Layout Test Document",
+            description="Test Description",
+            file_type="pdf",
+            creator=self.user,
+        )
+
+        pdf_content = b"%PDF-1.7\n1 0 obj\n<</Type/Catalog/Pages 2 0 R>>\nendobj\n2 0 obj\n<</Type/Pages/Count 1/Kids[3 0 R]>>\nendobj\n3 0 obj\n<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Resources<<>>>>\nendobj\nxref\n0 4\n0000000000 65535 f\n0000000010 00000 n\n0000000053 00000 n\n0000000102 00000 n\ntrailer\n<</Size 4/Root 1 0 R>>\nstartxref\n178\n%%EOF\n"  # noqa: E501
+        self.doc.pdf_file.save("test_layout.pdf", ContentFile(pdf_content))
+
+        # JSON response with layout elements but no items
+        # Uses actual LlamaParse format with bBox and w/h
+        self.layout_only_response = [
+            {
+                "pages": [
+                    {
+                        "text": "Layout only page content.",
+                        "width": 612,
+                        "height": 792,
+                        "items": [],  # Empty items list
+                        "layout": [
+                            {
+                                "label": "title",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 39.6,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "text": "Document Title from Layout",
+                            },
+                            {
+                                "label": "paragraph",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 158.4,
+                                    "w": 489.6,
+                                    "h": 79.2,
+                                },
+                                "text": "Paragraph content from layout element.",
+                            },
+                            {
+                                "label": "figure",
+                                "bBox": {
+                                    "x": 122.4,
+                                    "y": 316.8,
+                                    "w": 367.2,
+                                    "h": 237.6,
+                                },
+                                "text": "",  # Empty text for figure - should use [figure]
+                            },
+                            {
+                                "label": "text",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 594.0,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "text": "",  # Empty text for non-figure - should be skipped
+                            },
+                        ],
+                    }
+                ]
+            }
+        ]
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_layout_only_processing(
+        self, mock_open, mock_llama_parse_class
+    ):
+        """Test document parsing when items are empty but layout exists.
+
+        This tests lines 344-381 in llamaparse_parser.py.
+        """
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = self.layout_only_response
+        mock_llama_parse_class.return_value = mock_parser
+
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        # Verify result structure
+        self.assertIsNotNone(result)
+        self.assertEqual(result["title"], "Layout Test Document")
+        self.assertEqual(result["page_count"], 1)
+
+        # Verify PAWLS content structure (tokens are empty - no token-level data)
+        self.assertIn("pawls_file_content", result)
+        self.assertEqual(len(result["pawls_file_content"]), 1)
+        first_page = result["pawls_file_content"][0]
+        # LlamaParse doesn't provide token-level data
+        self.assertEqual(len(first_page["tokens"]), 0)
+
+        # Verify annotations were created from layout elements
+        self.assertIn("labelled_text", result)
+        # Should have 3 annotations: title, paragraph, and figure
+        # The empty text "text" type should be skipped
+        self.assertEqual(len(result["labelled_text"]), 3)
+
+        # Check the annotation labels
+        labels = [anno["annotationLabel"] for anno in result["labelled_text"]]
+        self.assertIn("Title", labels)
+        self.assertIn("Paragraph", labels)
+        self.assertIn("Figure", labels)
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_layout_figure_without_text(
+        self, mock_open, mock_llama_parse_class
+    ):
+        """Test that figures/images with empty text are processed correctly.
+
+        Figures and images should use [element_type] as placeholder text.
+        """
+        layout_with_images = [
+            {
+                "pages": [
+                    {
+                        "text": "Page with figures",
+                        "width": 612,
+                        "height": 792,
+                        "items": [],
+                        "layout": [
+                            {
+                                "label": "image",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 79.2,
+                                    "w": 489.6,
+                                    "h": 316.8,
+                                },
+                                "text": "",  # Empty text - should use [image]
+                            },
+                            {
+                                "label": "figure",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 475.2,
+                                    "w": 489.6,
+                                    "h": 237.6,
+                                },
+                                "text": "",  # Empty text - should use [figure]
+                            },
+                        ],
+                    }
+                ]
+            }
+        ]
+
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = layout_with_images
+        mock_llama_parse_class.return_value = mock_parser
+
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        # Both figure and image should be processed
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result["labelled_text"]), 2)
+
+        # Check that placeholder text was used
+        for anno in result["labelled_text"]:
+            self.assertIn(anno["rawText"], ["[image]", "[figure]"])
+
+    @override_settings(LLAMAPARSE_API_KEY="test-api-key-123")
+    @patch("llama_parse.LlamaParse")
+    @patch("opencontractserver.pipeline.parsers.llamaparse_parser.default_storage.open")
+    def test_parse_document_layout_skips_empty_text_non_figures(
+        self, mock_open, mock_llama_parse_class
+    ):
+        """Test that non-figure elements with empty text are skipped."""
+        layout_with_empty_text = [
+            {
+                "pages": [
+                    {
+                        "text": "Page content",
+                        "width": 612,
+                        "height": 792,
+                        "items": [],
+                        "layout": [
+                            {
+                                "label": "title",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 79.2,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "text": "Valid Title",  # Has text - should be included
+                            },
+                            {
+                                "label": "paragraph",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 158.4,
+                                    "w": 489.6,
+                                    "h": 79.2,
+                                },
+                                "text": "",  # Empty text - should be skipped
+                            },
+                            {
+                                "label": "heading",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 316.8,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "text": "",  # Empty text - should be skipped
+                            },
+                            {
+                                "label": "section_header",
+                                "bBox": {
+                                    "x": 61.2,
+                                    "y": 396.0,
+                                    "w": 489.6,
+                                    "h": 39.6,
+                                },
+                                "text": "Valid Section Header",  # Has text - should be included
+                            },
+                        ],
+                    }
+                ]
+            }
+        ]
+
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"mock pdf content"
+        mock_open.return_value.__enter__.return_value = mock_file
+
+        mock_parser = MagicMock()
+        mock_parser.get_json_result.return_value = layout_with_empty_text
+        mock_llama_parse_class.return_value = mock_parser
+
+        parser = LlamaParseParser()
+        result = parser.parse_document(user_id=self.user.id, doc_id=self.doc.id)
+
+        # Only 2 annotations should be created (title and section_header)
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result["labelled_text"]), 2)
+
+        labels = [anno["annotationLabel"] for anno in result["labelled_text"]]
+        self.assertIn("Title", labels)
+        self.assertIn("Section Header", labels)
+        self.assertNotIn("Paragraph", labels)
+        self.assertNotIn("Heading", labels)
diff --git a/opencontractserver/tests/test_pipeline_component_queries.py b/opencontractserver/tests/test_pipeline_component_queries.py
index 831e9b7fc..5ffe82197 100644
--- a/opencontractserver/tests/test_pipeline_component_queries.py
+++ b/opencontractserver/tests/test_pipeline_component_queries.py
@@ -344,7 +344,11 @@ def test_pipeline_components_query_with_mimetype(self):
         self.assertIn("Test PostProcessor", post_processor_titles)
 
     def test_pipeline_components_query_with_mimetype_no_components(self):
-        """Test querying pipeline components with a mimetype that has no components."""
+        """Test querying pipeline components with a mimetype that has limited components.
+
+        Note: DOCX now has LlamaParseParser support, but no thumbnailer support.
+        This test verifies the filtering behavior for file types with partial support.
+        """
 
         # Use the enum value, not the full MIME type
         query = """
@@ -366,13 +370,19 @@ def test_pipeline_components_query_with_mimetype_no_components(self):
         }
         """
 
-        variables = {"mimetype": "DOCX"}  # Our test components do not support DOCX
+        variables = {"mimetype": "DOCX"}
 
         result = self.client.execute(query, variables=variables)
         self.assertIsNone(result.get("errors"))
 
         data = result["data"]["pipelineComponents"]
-        self.assertEqual(len(data["parsers"]), 0)
+
+        # LlamaParseParser supports DOCX, so we expect at least one parser
+        parsers = data["parsers"]
+        parser_titles = [parser["title"] for parser in parsers]
+        self.assertIn("LlamaParse Parser", parser_titles)
+
+        # No thumbnailers support DOCX
         self.assertEqual(len(data["thumbnailers"]), 0)
 
         # Embedders are included regardless of mimetype in our utils
diff --git a/requirements/ingestors/llama_parse.txt b/requirements/ingestors/llama_parse.txt
new file mode 100644
index 000000000..12219944a
--- /dev/null
+++ b/requirements/ingestors/llama_parse.txt
@@ -0,0 +1,2 @@
+# LlamaParse document parsing
+llama-parse>=0.5.0