IntelLabs
diff --git a/‎Makefile‎
Lines changed: 30 additions & 27 deletions b/‎Makefile‎
Lines changed: 30 additions & 27 deletions
diff --git a/‎README.md‎
Lines changed: 49 additions & 19 deletions b/‎README.md‎
Lines changed: 49 additions & 19 deletions
diff --git a/‎examples/Makefile‎
Lines changed: 1 addition & 1 deletion b/‎examples/Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/autogcg/Makefile‎
Lines changed: 1 addition & 1 deletion b/‎examples/autogcg/Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/basic/Makefile‎
Lines changed: 5 additions & 2 deletions b/‎examples/basic/Makefile‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/basic/basic_dev_workflow.ipynb‎
Lines changed: 1 addition & 0 deletions b/‎examples/basic/basic_dev_workflow.ipynb‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/basic/main.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/basic/main.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/fact_checking/Makefile‎
Lines changed: 2 additions & 2 deletions b/‎examples/fact_checking/Makefile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/fact_checking/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎examples/fact_checking/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/llada/Makefile‎
Lines changed: 18 additions & 0 deletions b/‎examples/llada/Makefile‎
Lines changed: 18 additions & 0 deletions
@@ -10,46 +10,49 @@ define REPEATED_CONTENT
 > $(shell python -c "print('Okay, so I need to tell someone about Saturn.' * 65)")
 endef
 
-all: run-core run-cache run-batching run-reasoning run-examples
-
-install:
-> uv venv
-> uv sync --extra gpu --extra dev
-
-install-xpu:
-> uv venv
-> uv sync --extra xpu --extra dev
+all: run-core run-cache run-batching run-reasoning run-examples run-image-text-to-text run-dataset
 
 # Run target for README.md examples
-run-core: install
-> uv run accelerate launch -m llmart model=llama3-8b-instruct data=basic steps=3
-> uv run accelerate launch -m llmart model=custom model.name=Intel/neural-chat-7b-v3-3 model.revision=7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e data=basic steps=3
-> uv run python -m llmart model=llama3.1-70b-instruct model.device=null model.device_map=auto data=basic steps=3
-> uv run accelerate launch -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=[0] steps=3
+run-core:
+> $(RUN_GPU) accelerate launch -m llmart model=llama3-8b-instruct data=basic steps=3
+> $(RUN_GPU) accelerate launch -m llmart model=custom model.name=Intel/neural-chat-7b-v3-3 model.revision=7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e data=basic steps=3
+> $(RUN_GPU) python -m llmart model=llama3.1-70b-instruct model.device=null model.device_map=auto data=basic steps=3
+> $(RUN_GPU) accelerate launch -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=[0] steps=3
 
 # Run target for KV cache
-run-cache: install
-> uv run accelerate launch -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=[0] steps=3 use_kv_cache=True
+run-cache:
+> $(RUN_GPU) accelerate launch -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=[0] steps=3 use_kv_cache=True
 
 # Run target for batch configurations
-run-batching: install
-> uv run accelerate launch --num_processes 2 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=4 data.n_val=1 data.n_test=1 steps=2 bs=8
-> uv run accelerate launch --num_processes 2 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=13 data.n_val=1 data.n_test=1 steps=2 bs=1 per_device_bs=5
-> uv run accelerate launch --num_processes 3 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=13 data.n_val=1 data.n_test=1 steps=2 bs=13
+run-batching:
+> $(RUN_GPU) accelerate launch --num_processes 2 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=4 data.n_val=1 data.n_test=1 steps=2 bs=8
+> $(RUN_GPU) accelerate launch --num_processes 2 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=13 data.n_val=1 data.n_test=1 steps=2 bs=1 per_device_bs=5
+> $(RUN_GPU) accelerate launch --num_processes 3 -m llmart model=llama3-8b-instruct data=advbench_behavior data.subset=null data.n_train=13 data.n_val=1 data.n_test=1 steps=2 bs=13
 
 # Run target for reasoning examples
-run-reasoning: install
-> uv run accelerate launch -m llmart model=deepseek-r1-distill-llama-8b data=basic per_device_bs=64 "response.replace_with=`echo -e '\"<think>\nOkay, so I need to tell someone about Saturn.\n</think>\n\nNO WAY JOSE\"'`" steps=3
-> uv run accelerate launch -m llmart model=deepseek-r1-distill-llama-8b data=basic "response.replace_with=`echo -e '\"<think>\n$(REPEATED_CONTENT)\n</think>\n\nNO WAY JOSE\"'`" steps=3
+run-reasoning:
+> $(RUN_GPU) accelerate launch -m llmart model=deepseek-r1-distill-llama-8b data=basic per_device_bs=64 "response.prefix=`echo -e '\"<think>\nOkay, so I need to tell someone about Saturn.\n</think>\n\n\"'`" steps=3
+> $(RUN_GPU) accelerate launch -m llmart model=deepseek-r1-distill-llama-8b data=basic "response.prefix=`echo -e '\"<think>\n$(REPEATED_CONTENT)\n</think>\n\n\"'`" steps=3
 
 # Run target for XPU
-run-xpu: install-xpu
-> uv run accelerate launch -m llmart model=custom model.name=meta-llama/Llama-3.2-3B-Instruct model.revision=0cb88a4f764b7a12671c53f0838cd831a0843b95 data=basic model.device=xpu
+run-xpu:
+> $(RUN_XPU) accelerate launch -m llmart model=custom model.name=meta-llama/Llama-3.2-3B-Instruct model.revision=0cb88a4f764b7a12671c53f0838cd831a0843b95 data=basic model.device=xpu
+
+# Run image-text-to-text examples
+run-image-text-to-text:
+> $(RUN_GPU) accelerate launch -m llmart model=ui-tars-2b-sft data=ui-tars attack.suffix=20 attack.suffix_pad_left="" per_device_bs=1 optim.n_swaps=16 steps=3
+# NOTE: The example below does not run because mllama does not allow feeding pixel_values and inputs_embeds
+#> $(RUN_GPU) accelerate launch -m llmart model=llama3.2-11b-vision-instruct data=mllama per_device_bs=16 attack.suffix=40 steps=3
+
+# Run dataset examples
+run-dataset:
+> $(RUN_GPU) accelerate launch -m llmart model=harmbench-classifier data=harmbench 'data.subset=[2]' per_device_bs=8 steps=3
+> $(RUN_GPU) accelerate launch -m llmart model=llamaguard3-1b data=toxic-chat 'data.subset=[0]' attack.suffix_pad_right="." data.mapper=toxic_chat_model_output steps=3
 
-run-examples: install
+run-examples:
 > $(MAKE) -C examples
 
 clean:
 > rm -rf .venv
 
-.PHONY: all install install-xpu run-core run-cache run-batching run-reasoning run-xpu run-examples clean
+.PHONY: all run-core run-cache run-batching run-reasoning run-xpu run-image-text-to-text run-dataset run-examples clean
@@ -11,12 +11,22 @@
 </div>
 
 ## 🆕 Latest updates
-❗Release 2025.04 brings full native support for running **LLM**art on [Intel AI PCs](https://www.intel.com/content/www/us/en/products/docs/processors/core-ultra/ai-pc.html)! This allows AI PC owners to _locally_ and rigorously evaluate the security of their own privately fine-tuned and deployed LLMs.
+❗❗ Release 2025.06 significantly expands the types of models that can be attacked using **LLM**art and adds an image modality attack example that combines **LLM**art with Intel's [MART](https://github.com/IntelLabs/MART) library, as well the first ever attack on a diffusion language model (dLLM)!
 
-❗This release also marks our transition to a `uv`-centric install experience. Enjoy robust, platform agnostic (Windows, Linux) one-line installs by using `uv sync --extra gpu` (for GPUs) or `uv sync --extra xpu` (for Intel XPUs).
+❗New core library support and examples for attacking VLMs. Check out our new [example](examples/vlm) on vision modality attacks against a [computer use model](https://huggingface.co/ByteDance-Seed/UI-TARS-7B-DPO)!
 
+❗New core library support for out-of-the-box attacks against guardrail models and data formats such as [HarmBench](https://github.com/centerforaisafety/HarmBench). Just specify the model and data directly in the command line and press the Enter key!
+```bash
+uv run accelerate launch -m llmart model=harmbench-classifier data=harmbench data.subset=[0]
+```
+
+❗New example for attacking the [LLaDA](https://ml-gsai.github.io/LLaDA-demo/) diffusion large language model. If you're an AI security expert, the conclusion won't suprise you: **LLM**art can crack it in ~10 minutes in our ready-to-run [example](examples/llada)!
+
+❗We made it easier to adapt existing datasets to existing models via the [DataMapper](src/llmart/data.py#L93) abstraction. See [Custom Dataset or DataMapper](#custom-dataset-or-datamapper) for more details!
 <details>
 <summary>Past updates</summary>
+❗Release 2025.04 brings full native support for running **LLM**art on [Intel AI PCs](https://www.intel.com/content/www/us/en/products/docs/processors/core-ultra/ai-pc.html)! This allows AI PC owners to _locally_ and rigorously evaluate the security of their own privately fine-tuned and deployed LLMs. This release also marks our transition to a `uv`-centric install experience. Enjoy robust, platform agnostic (Windows, Linux) one-line installs by using `uv sync --extra gpu` (for GPUs) or `uv sync --extra xpu` (for Intel XPUs).
+
 ❗Release 2025.03 brings a new experimental functionality for letting **LLM**art automatically estimate the maximum usable `per_device_bs`. This can result in speed-ups up to 10x on devices with a sufficient amount of memory! Enable from the command line using `per_device_bs=-1`.
 
 ❗Release 2025.02 brings significant speed-ups to the core library, with zero user involvement.\
@@ -147,31 +157,28 @@ uv run python -m llmart model=llama3.1-70b-instruct model.device=null model.devi
 ## :clipboard: Datasets
 Inspecting and adding a new dataset to **LLM**art only involves a single function that returns a list with the standard Hugging Face conversation format.
 
+### Source code modification
 The `basic` dataset is the simplest instantiation that lives at [`datasets/basic.py`](src/llmart/datasets/basic.py), but is also powerful enough to support custom modifications.
 
 For example, to modify the natural language prompt and desired open-ended response, the following basic edit is sufficient:
 ```diff
 diff src/llmart/datasets/basic.py
-         # Create conversation data structure and mark parts we care about
-         conv = [
--            dict(role="user", content=mark_prompt("Tell me about the planet Saturn.")),
--            dict(role="assistant", content=mark_completion("NO WAY JOSE")),
-+            dict(role="user", content=mark_prompt("Tell me how to do something not allowed.")),
-+            dict(role="assistant", content=mark_completion("Sure, I'll tell you: first, you")),
-         ]
+         example = dict(
+-            prompt="Tell me about the planet Saturn.", completion="NO WAY JOSE"
++            prompt="Tell me how to do something not allowed.", completion="Sure, I'll tell you: first, you"
+         )
 ```
 
 Inducing a closed-ended response can be also directly done by typing out the end of turn token. For example, for the Llama 3 family of models this is:
 ```diff
 diff src/llmart/datasets/basic.py
-         # Create conversation data structure and mark parts we care about
-         conv = [
-             dict(role="user", content=mark_prompt("Tell me about the planet Saturn.")),
--            dict(role="assistant", content=mark_completion("NO WAY JOSE")),
-+            dict(role="assistant", content=mark_completion("NO WAY JOSE<|eot_id|>")),
-         ]
+         example = dict(
+-            prompt="Tell me about the planet Saturn.", completion="NO WAY JOSE"
++            prompt="Tell me about the planet Saturn.", completion="No!<|eot_id|>"
+         )
 ```
 
+### Command-line modification
 **LLM**art also supports loading the [AdvBench](https://github.com/llm-attacks/llm-attacks) dataset, which comes with pre-defined target responses to ensure consistent benchmarks.
 
 Using AdvBench with **LLM**art requires specifying the desired subset of samples to attack. By default, the following command will automatically download the .csv file from its [original source](https://raw.githubusercontent.com/llm-attacks/llm-attacks/refs/heads/main/data/advbench/harmful_behaviors.csv) and use it as a dataset:
@@ -182,11 +189,34 @@ uv run accelerate launch -m llmart model=llama3-8b-instruct data=advbench_behavi
 To train a single adversarial attack on multiple samples, users can specify the exact samples via `data.subset=[0,1]`.
 The above command is also compatible with local modifications of the dataset by including the `dataset.files=/path/to/file.csv` argument.
 
-In the most general case, you can write your own [dataset loading script](https://huggingface.co/docs/datasets/en/dataset_script) and pass it to **LLM**art:
+### Custom Dataset or DataMapper
+In the most general case, you can write your own [dataset loading script](https://huggingface.co/docs/datasets/en/dataset_script) or [DataMapper](src/llmart/data.py#L93) and pass it to **LLM**art. For example, you could write a custom `DataMapper` for the the dataset from [BoN Jailbreaking](https://github.com/jplhughes/bon-jailbreaking/) targeting the [Unispac/Llama2-7B-Chat-Augmented](https://huggingface.co/Unispac/Llama2-7B-Chat-Augmented) model by create a `/tmp/bon_jailbreaks.py` file with the following contents:
+```python
+from llmart import DataMapper
+
+
+class BoNJailbreaksMapper(DataMapper):
+    """ Make text_jailbreaks.csv compatible with Llama2 chat template. """
+    def __call__(self, batch):
+        # batch contains the following keys from text_jailbreaks.csv:
+        # direct_request,behavior_id,experiment,idx,model,augmented_file,response,length,label
+        convs = [
+            [
+                dict(role="user", content=self.modify_prompt(direct_request)),
+                dict(role="assistant", content=self.force_completion(response)),
+            ]
+            for direct_request, response in zip(
+                batch["direct_request"], batch["response"]
+            )
+        ]
+        return dict(conversation=convs)
+```
+You can then invoke the model
 ```bash
-uv run accelerate launch -m llmart model=llama3-8b-instruct data=custom data.path=/path/to/dataset.py
+uv run accelerate launch -m llmart model=llama2-7b-deep-alignment data=custom data.path=csv data.files=https://raw.githubusercontent.com/jplhughes/bon-jailbreaking/refs/heads/main/docs/assets/data/text_jailbreaks.csv data.subset=[0] data.mapper=/tmp/bon_jailbreaks.py
 ```
-Just make sure you conform to the output format in [`datasets/basic.py`](src/llmart/datasets/basic.py).
+
+See [`datasets/basic.py`](src/llmart/datasets/basic.py) for how to write a custom dataset and/or datamapper.
 
 ## :chart_with_downwards_trend: Optimizers and schedulers
 Discrete optimization for language models [(Lei et al, 2019)](https://proceedings.mlsys.org/paper_files/paper/2019/hash/676638b91bc90529e09b22e58abb01d6-Abstract.html) &ndash; in particular the Greedy Coordinate Gradient (GCG) applied to auto-regressive LLMs [(Zou et al, 2023)](https://arxiv.org/abs/2307.15043) &ndash; is the main focus of [`optim.py`](src/llmart/optim.py).
@@ -216,7 +246,7 @@ If you find this repository useful in your work, please cite:
   author = {Cory Cornelius and Marius Arvinte and Sebastian Szyller and Weilin Xu and Nageen Himayat},
   title = {{LLMart}: {L}arge {L}anguage {M}odel adversarial robutness toolbox},
   url = {http://github.com/IntelLabs/LLMart},
-  version = {2025.04},
+  version = {2025.06},
   year = {2025},
 }
 ```
@@ -6,7 +6,7 @@
 
 include ../preamble.mk
 
-EXAMPLE_DIRS := basic autogcg fact_checking llmguard random_strings unlearning
+EXAMPLE_DIRS := basic autogcg fact_checking llada llmguard random_strings unlearning vlm
 
 all: run
 
 
@@ -15,6 +15,6 @@ args=--subset $(subset) --time_budget_s $(time_budget_s) --steps $(steps) --num_
 all: run
 
 run:
-> uv run --with-requirements requirements.txt main.py $(args)
+> $(RUN_GPU) --with-requirements requirements.txt main.py $(args)
 
 .PHONY: all run
@@ -12,6 +12,9 @@ args=--num_steps $(num_steps)
 all: run
 
 run:
-> uv run --with-requirements requirements.txt main.py $(args)
+> $(RUN_GPU) --with-requirements requirements.txt main.py $(args)
 
-.PHONY: all run
+run_notebook:
+> $(RUN_GPU) --with-requirements requirements.txt jupyter execute basic_dev_workflow.ipynb
+
+.PHONY: all run run_notebook
@@ -326,6 +326,7 @@
     "    is_valid_input=wrapped_tokenizer.reencodes,\n",
     "    batch_size=per_device_bs,\n",
     "    use_kv_cache=False,  # NOTE: KV caching is incompatible with optimizable position\n",
+    "    ignored_keys=wrapped_tokenizer.mask_names + [\"inputs_embeds\"],\n",
     ")\n",
     "\n",
     "# Advanced: use a scheduler to reduce \"n_tokens\" by 0.5x on loss plateau after 50 steps\n",
 
@@ -106,6 +106,8 @@ def attack(
         is_valid_input=wrapped_tokenizer.reencodes,
         batch_size=per_device_bs,
         use_kv_cache=False,  # NOTE: KV caching is incompatible with optimizable position
+        ignored_keys=wrapped_tokenizer.mask_names
+        + ["inputs_embeds"],  # NOTE: AdversarialBlockShift returns inputs_embeds
     )
 
     # For each step
 
@@ -12,7 +12,7 @@ args=--num_steps $(num_steps)
 all: run
 
 run:
-> uv run --with-requirements requirements.txt claim.py $(args)
-> uv run --with-requirements requirements.txt document.py $(args)
+> $(RUN_GPU) --with-requirements requirements.txt claim.py $(args)
+> $(RUN_GPU) --with-requirements requirements.txt document.py $(args)
 
 .PHONY: all run
@@ -1,3 +1,3 @@
 fire==0.7.0
-vllm==0.8.4
+vllm==0.9.0
 minicheck[llm] @ git+https://github.com/Liyan06/MiniCheck.git@main
@@ -0,0 +1,18 @@
+#
+# Copyright (C) 2025 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+include ../../preamble.mk
+
+num_steps=2
+args=--num_steps $(num_steps)
+
+all: run
+
+run:
+> wget https://raw.githubusercontent.com/ML-GSAI/LLaDA/f51cb1731f5a40ba35c15e51b6b66b147e689f24/generate.py
+> $(RUN_GPU) --with-requirements requirements.txt main.py $(args)
+
+.PHONY: all run
Original file line number	Diff line number	Diff line change
`@@ -106,6 +106,8 @@ def attack(`
`106`	`106`	`is_valid_input=wrapped_tokenizer.reencodes,`
`107`	`107`	`batch_size=per_device_bs,`
`108`	`108`	`use_kv_cache=False, # NOTE: KV caching is incompatible with optimizable position`
	`109`	`+ ignored_keys=wrapped_tokenizer.mask_names`
	`110`	`+ + ["inputs_embeds"], # NOTE: AdversarialBlockShift returns inputs_embeds`
`109`	`111`	`)`
`110`	`112`
`111`	`113`	`# For each step`