Skip to content

Commit c7766e9

Browse files
committed
remove self.indexed_dataset
Signed-off-by: dimapihtar <[email protected]>
1 parent 092ecb9 commit c7766e9

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

megatron/training/datasets/fim_dataset.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def __init__(
6262
) -> None:
6363
super().__init__(indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config)
6464

65-
self.indexed_dataset = indexed_dataset
6665
self.np_rng = np.random.RandomState(seed=self.config.random_seed)
6766
logger.info(f"Initialized FIM RNG with seed = {self.config.random_seed}")
6867
# get FIM params
@@ -111,7 +110,7 @@ def _query_document_sample_shuffle_indices(self, idx: int) -> Tuple[np.ndarray,
111110

112111
# Add the entire sample
113112
sample_parts.append(
114-
self.indexed_dataset.get(
113+
self.dataset.get(
115114
self.document_index[doc_index_beg],
116115
offset=doc_index_beg_offset,
117116
length=doc_index_end_offset - doc_index_beg_offset + 1,
@@ -127,7 +126,7 @@ def _query_document_sample_shuffle_indices(self, idx: int) -> Tuple[np.ndarray,
127126
# Add the sample part
128127
offset = 0 if i > doc_index_beg else doc_index_beg_offset
129128
length = None if i < doc_index_end else doc_index_end_offset + 1
130-
sample_parts.append(self.indexed_dataset.get(self.document_index[i], offset=offset, length=length))
129+
sample_parts.append(self.dataset.get(self.document_index[i], offset=offset, length=length))
131130

132131
sample = np.concatenate(sample_parts)
133132

0 commit comments

Comments
 (0)