Skip to content

DSE-45114: Add Data Augmentation #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions alembic/versions/2b4e8d9f6c3a_add_completed_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""add_completed_rows
Revision ID: 2b4e8d9f6c3a
Revises: 1a8fdc23eb6f
Create Date: 2025-01-18 10:30:00.000000
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '2b4e8d9f6c3a'
down_revision: Union[str, None] = '1a8fdc23eb6f'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# Add completed_rows column to generation_metadata table
with op.batch_alter_table('generation_metadata', schema=None) as batch_op:
batch_op.add_column(sa.Column('completed_rows', sa.Integer(), nullable=True))


def downgrade() -> None:
# Remove completed_rows column from generation_metadata table
with op.batch_alter_table('generation_metadata', schema=None) as batch_op:
batch_op.drop_column('completed_rows')
13 changes: 13 additions & 0 deletions app/client/src/assets/ic-data-augmentation.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
42 changes: 22 additions & 20 deletions app/client/src/pages/DataGenerator/Configure.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ import { File, WorkflowType } from './types';
import { useFetchModels } from '../../api/api';
import { MODEL_PROVIDER_LABELS } from './constants';
import { ModelProviders, ModelProvidersDropdownOpts } from './types';
import { useWizardCtx } from './utils';
import { getWizardModel, getWizardModeType, useWizardCtx } from './utils';
import FileSelectorButton from './FileSelectorButton';
import UseCaseSelector from './UseCaseSelector';
import { useLocation } from 'react-router-dom';
import { WizardModeType } from '../../types';
import { get } from 'lodash';


const StepContainer = styled(Flex)`
Expand Down Expand Up @@ -38,7 +42,7 @@ export const USECASE_OPTIONS = [
export const WORKFLOW_OPTIONS = [
{ label: 'Supervised Fine-Tuning', value: 'supervised-fine-tuning' },
{ label: 'Custom Data Generation', value: 'custom' },
{ label: 'Freeform Data Generation', value: 'freeform' }
// { label: 'Freeform Data Generation', value: 'freeform' }
];

export const MODEL_TYPE_OPTIONS: ModelProvidersDropdownOpts = [
Expand All @@ -47,6 +51,18 @@ export const MODEL_TYPE_OPTIONS: ModelProvidersDropdownOpts = [
];

const Configure = () => {
const location = useLocation();
const [wizardModeType, setWizardModeType] = useState(getWizardModeType(location));

useEffect(() => {
if (wizardModeType === WizardModeType.DATA_AUGMENTATION) {
setWizardModeType(WizardModeType.DATA_AUGMENTATION);
form.setFieldValue('workflow_type', 'freeform');
} else {
setWizardModeType(WizardModeType.DATA_GENERATION);
}
}, [location, wizardModeType]);

const form = Form.useFormInstance();
const formData = Form.useWatch((values) => values, form);
const { setIsStepValid } = useWizardCtx();
Expand Down Expand Up @@ -140,8 +156,10 @@ const Configure = () => {
label='Model Provider'
rules={[{ required: true }]}
labelCol={labelCol}
shouldUpdate
>
<Select

onChange={() => form.setFieldValue('model_id', undefined)}
placeholder={'Select a model provider'}
>
Expand Down Expand Up @@ -209,6 +227,7 @@ const Configure = () => {
label='Workflow'
tooltip='A specialized workflow for your dataset'
labelCol={labelCol}
hidden={wizardModeType === WizardModeType.DATA_AUGMENTATION}
shouldUpdate
rules={[
{ required: true }
Expand All @@ -224,24 +243,7 @@ const Configure = () => {
</Form.Item>
{(formData?.workflow_type === WorkflowType.SUPERVISED_FINE_TUNING ||
formData?.workflow_type === WorkflowType.FREE_FORM_DATA_GENERATION) &&
<Form.Item
name='use_case'
label='Template'
rules={[
{ required: true }
]}
tooltip='A specialize template for generating your dataset'
labelCol={labelCol}
shouldUpdate
>
<Select placeholder={'Select a template'}>
{USECASE_OPTIONS.map(option =>
<Select.Option key={option.value} value={option.value}>
{option.label}
</Select.Option>
)}
</Select>
</Form.Item>}
<UseCaseSelector />}

{(
formData?.workflow_type === WorkflowType.SUPERVISED_FINE_TUNING ||
Expand Down
1 change: 0 additions & 1 deletion app/client/src/pages/DataGenerator/CustomPromptButton.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ const CustomPromptButton: React.FC<Props> = ({ model_id, inference_type, caii_en
const [showModal, setShowModal] = useState(false);
const [disabled, setDisabled] = useState(false);
const custom_prompt_instructions = Form.useWatch('custom_prompt_instructions', { form, preserve: true });
console.log('custom_prompt_instructions', custom_prompt_instructions);

const mutation = useMutation({
mutationFn: fetchCustomPrompt
Expand Down
11 changes: 9 additions & 2 deletions app/client/src/pages/DataGenerator/DataGenerator.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import isEmpty from 'lodash/isEmpty';
import isString from 'lodash/isString';
import { useEffect, useRef, useState } from 'react';
import { FunctionComponent, useEffect, useRef, useState } from 'react';
import { useLocation, useParams } from 'react-router-dom';

import { Button, Flex, Form, Layout, Steps } from 'antd';
Expand All @@ -20,10 +20,15 @@ import { DataGenWizardSteps, WizardStepConfig, WorkflowType } from './types';
import { WizardCtx } from './utils';
import { fetchDatasetDetails, useGetDatasetDetails } from '../DatasetDetails/hooks';
import { useMutation } from '@tanstack/react-query';
import { WizardModeType } from '../../types';

const { Content } = Layout;
// const { Title } = Typography;

interface Props {
mode?: WizardModeType;
}

const StyledTitle = styled.div`
margin-top: 10px;
font-family: Roboto, -apple-system, 'Segoe UI', sans-serif;
Expand Down Expand Up @@ -95,13 +100,15 @@ const steps: WizardStepConfig[] = [
/**
* Wizard component for Synthetic Data Generation workflow
*/
const DataGenerator = () => {
const DataGenerator: FunctionComponent<Props> = ({ mode }) => {
console.log('DataGenerator mode: ', mode);
const [current, setCurrent] = useState(0);
const [maxStep, setMaxStep] = useState(0);
const [isStepValid, setIsStepValid] = useState<boolean>(false);

// Data passed from listing table to prepopulate form
const location = useLocation();
console.log('DataGenerator location: ', location);
const { generate_file_name } = useParams();
const initialData = location?.state?.data;
const mutation = useMutation({
Expand Down
34 changes: 22 additions & 12 deletions app/client/src/pages/DataGenerator/Examples.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ import { useMutation } from "@tanstack/react-query";
import { useFetchExamples } from '../../api/api';
import TooltipIcon from '../../components/TooltipIcon';
import PCModalContent from './PCModalContent';
import { File, QuestionSolution, WorkflowType } from './types';
import { ExampleType, File, QuestionSolution, WorkflowType } from './types';
import FileSelectorButton from './FileSelectorButton';

import { fetchFileContent } from './hooks';
import { fetchFileContent, getExampleType, useGetExamplesByUseCase } from './hooks';
import { useState } from 'react';
import FreeFormExampleTable from './FreeFormExampleTable';

const { Title } = Typography;
const { Title, Text } = Typography;
const Container = styled.div`
padding-bottom: 10px
`
Expand Down Expand Up @@ -48,10 +48,7 @@ const StyledContainer = styled.div`

const MAX_EXAMPLES = 5;

enum ExampleType {
FREE_FORM = 'freeform',
PROMPT_COMPLETION = 'promptcompletion'
}


const Examples: React.FC = () => {
const form = Form.useFormInstance();
Expand Down Expand Up @@ -90,13 +87,13 @@ const Examples: React.FC = () => {
title: 'Prompts',
dataIndex: 'question',
ellipsis: true,
render: (_text: QuestionSolution, record: QuestionSolution) => <>{record.question}</>
render: (_text: QuestionSolution, record: QuestionSolution) => <Text>{record.question}</Text>
},
{
title: 'Completions',
dataIndex: 'solution',
ellipsis: true,
render: (_text: QuestionSolution, record: QuestionSolution) => <>{record.solution}</>
render: (_text: QuestionSolution, record: QuestionSolution) => <Text>{record.solution}</Text>
},
{
title: 'Actions',
Expand Down Expand Up @@ -178,13 +175,24 @@ const Examples: React.FC = () => {
/>
</Flex>
)
}},
}
},
];
const dataSource = Form.useWatch('examples', form);
const { data: examples, loading: examplesLoading } = useFetchExamples(form.getFieldValue('use_case'));
const { examples, exmpleFormat, isLoading: examplesLoading } =
useGetExamplesByUseCase(form.getFieldValue('use_case'));

// update examples
if (!dataSource && examples) {
form.setFieldValue('examples', examples.examples)
form.setFieldValue('examples', examples)
}
useEffect(() => {
if (!isEmpty(examples) && !isEmpty(exmpleFormat)) {
setExampleType(exmpleFormat as ExampleType);
form.setFieldValue('examples', examples || []);
}
}, [examples, exmpleFormat]);

const rowLimitReached = form.getFieldValue('examples')?.length === MAX_EXAMPLES;
const workflowType = form.getFieldValue('workflow_type');

Expand Down Expand Up @@ -299,6 +307,8 @@ const Examples: React.FC = () => {
</Header>
{exampleType === ExampleType.FREE_FORM && !isEmpty(mutation.data) &&
<FreeFormExampleTable data={mutation.data}/>}
{exampleType === ExampleType.FREE_FORM && form.getFieldValue('use_case') === 'lending_data' &&
<FreeFormExampleTable data={form.getFieldValue('examples')}/>}
{exampleType === ExampleType.FREE_FORM && isEmpty(mutation.data) && !isEmpty(values.examples) &&
<FreeFormExampleTable data={values.examples}/>}
{exampleType === ExampleType.FREE_FORM && isEmpty(mutation.data) && isEmpty(values.examples) &&
Expand Down
51 changes: 51 additions & 0 deletions app/client/src/pages/DataGenerator/UseCaseSelector.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { Form, Select } from "antd";
import { FunctionComponent, useEffect, useState } from "react";
import { useGetUseCases } from "./hooks";
import { UseCase } from "../../types";
import get from "lodash/get";

interface Props {}


const UseCaseSelector: FunctionComponent<Props> = () => {
const [useCases, setUseCases] = useState<UseCase[]>([]);
const useCasesReq = useGetUseCases();

useEffect(() => {
if (useCasesReq.data) {
let _useCases = get(useCasesReq, 'data.usecases', []);
_useCases = _useCases.map((useCase: any) => ({
...useCase,
label: useCase.name,
value: useCase.id
}));
setUseCases(_useCases);
}
}, [useCasesReq.data]);


return (
<Form.Item
name='use_case'
label='Template'
rules={[
{ required: true }
]}
tooltip='A specialized template for generating your dataset'
labelCol={{
span: 8
}}
shouldUpdate
>
<Select placeholder={'Select a template'}>
{useCases.map(option =>
<Select.Option key={option.value} value={option.value}>
{option.label}
</Select.Option>
)}
</Select>
</Form.Item>
);
}

export default UseCaseSelector;
Loading