From 8f0d3bd689370225fd372a8605d901b8ff3fc859 Mon Sep 17 00:00:00 2001 From: Khauneesh-AI Date: Mon, 23 Jun 2025 17:51:53 +0530 Subject: [PATCH 1/9] added sds banner --- images/synthetic-data-studio-banner.svg | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 images/synthetic-data-studio-banner.svg diff --git a/images/synthetic-data-studio-banner.svg b/images/synthetic-data-studio-banner.svg new file mode 100644 index 0000000..6804615 --- /dev/null +++ b/images/synthetic-data-studio-banner.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + From d2fa5f41b78cac9a6c8a98f00677174728b994b4 Mon Sep 17 00:00:00 2001 From: Keivan Vosoughi Date: Wed, 18 Jun 2025 16:34:51 -0700 Subject: [PATCH 2/9] DSE-45299: Fetching Templates from SDS API for SDS UI Add UseCaseSlector --- .../src/pages/DataGenerator/Configure.tsx | 20 +------ .../pages/DataGenerator/UseCaseSelector.tsx | 54 +++++++++++++++++++ app/client/src/pages/DataGenerator/hooks.ts | 26 ++++++++- app/client/src/types.ts | 9 +++- 4 files changed, 89 insertions(+), 20 deletions(-) create mode 100644 app/client/src/pages/DataGenerator/UseCaseSelector.tsx diff --git a/app/client/src/pages/DataGenerator/Configure.tsx b/app/client/src/pages/DataGenerator/Configure.tsx index a68bec8..bb7d42e 100644 --- a/app/client/src/pages/DataGenerator/Configure.tsx +++ b/app/client/src/pages/DataGenerator/Configure.tsx @@ -10,6 +10,7 @@ import { MODEL_PROVIDER_LABELS } from './constants'; import { ModelProviders, ModelProvidersDropdownOpts } from './types'; import { useWizardCtx } from './utils'; import FileSelectorButton from './FileSelectorButton'; +import UseCaseSelector from './UseCaseSelector'; const StepContainer = styled(Flex)` @@ -224,24 +225,7 @@ const Configure = () => { {(formData?.workflow_type === WorkflowType.SUPERVISED_FINE_TUNING || formData?.workflow_type === WorkflowType.FREE_FORM_DATA_GENERATION) && - - - } + } {( formData?.workflow_type === WorkflowType.SUPERVISED_FINE_TUNING || diff --git a/app/client/src/pages/DataGenerator/UseCaseSelector.tsx b/app/client/src/pages/DataGenerator/UseCaseSelector.tsx new file mode 100644 index 0000000..558ee28 --- /dev/null +++ b/app/client/src/pages/DataGenerator/UseCaseSelector.tsx @@ -0,0 +1,54 @@ +import { Form, Select } from "antd"; +import { FunctionComponent, useEffect, useState } from "react"; +import { useGetUseCases } from "./hooks"; +import { UseCase } from "../../types"; +import get from "lodash/get"; + +interface Props {} + + +const UseCaseSelector: FunctionComponent = () => { + const [useCases, setUseCases] = useState([]); + const useCasesReq = useGetUseCases(); + console.log('useCasesReq', useCasesReq); + + useEffect(() => { + if (useCasesReq.data) { + console.log('useCasesReq.data', useCasesReq.data); + let _useCases = get(useCasesReq, 'data.usecases', []); + _useCases = _useCases.map((useCase: any) => ({ + ...useCase, + label: useCase.name, + value: useCase.id + })); + console.log('_useCases', _useCases); + setUseCases(_useCases); + } + }, [useCasesReq.data]); + + + return ( + + + + ); +} + +export default UseCaseSelector; \ No newline at end of file diff --git a/app/client/src/pages/DataGenerator/hooks.ts b/app/client/src/pages/DataGenerator/hooks.ts index 2cb1dc7..5bd40e5 100644 --- a/app/client/src/pages/DataGenerator/hooks.ts +++ b/app/client/src/pages/DataGenerator/hooks.ts @@ -244,4 +244,28 @@ export const useDatasetSize = ( isError, error }; - } \ No newline at end of file + } + + export const fetchUseCases = async () => { + const resp = await fetch(`${BASE_API_URL}/use-cases`, { + method: 'GET' + }); + const body = await resp.json(); + return body; +} + +export const useGetUseCases = () => { + const { data, isLoading, isError, error, isFetching } = useQuery( + { + queryKey: ['fetchUseCases', fetchUseCases], + queryFn: () => fetchUseCases(), + refetchOnWindowFocus: false, + } + ); + return { + data, + isLoading: isLoading || isFetching, + isError, + error + }; +} \ No newline at end of file diff --git a/app/client/src/types.ts b/app/client/src/types.ts index ee3381b..295cccb 100644 --- a/app/client/src/types.ts +++ b/app/client/src/types.ts @@ -45,4 +45,11 @@ export const EXPORT_TYPE_LABELS: Record = { export type JobStatus = 'ENGINE_STOPPED' | 'ENGINE_SUCCEEDED' | 'ENGINE_TIMEDOUT' | 'ENGINE_SCHEDULING' | 'ENGINE_RUNNING' | 'null' | 'default'; -export const HuggingFaceIconUrl = "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"; \ No newline at end of file +export const HuggingFaceIconUrl = "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"; + +export interface UseCase { + name: string; + id: string; + label: string; + value: string; +} \ No newline at end of file From 246be13c601d2c6f02b42154f5f597281265d6cf Mon Sep 17 00:00:00 2001 From: Khauneesh-AI Date: Mon, 7 Jul 2025 14:38:51 +0530 Subject: [PATCH 3/9] changes to track completed rows in a complete or partial run --- .../2b4e8d9f6c3a_add_completed_rows.py | 30 +++++++++++++++++++ app/core/database.py | 19 +++++++----- app/migrations/alembic_schema_models.py | 1 + app/services/synthesis_service.py | 12 ++++---- 4 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 alembic/versions/2b4e8d9f6c3a_add_completed_rows.py diff --git a/alembic/versions/2b4e8d9f6c3a_add_completed_rows.py b/alembic/versions/2b4e8d9f6c3a_add_completed_rows.py new file mode 100644 index 0000000..5f1d56d --- /dev/null +++ b/alembic/versions/2b4e8d9f6c3a_add_completed_rows.py @@ -0,0 +1,30 @@ +"""add_completed_rows + +Revision ID: 2b4e8d9f6c3a +Revises: 1a8fdc23eb6f +Create Date: 2025-01-18 10:30:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '2b4e8d9f6c3a' +down_revision: Union[str, None] = '1a8fdc23eb6f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add completed_rows column to generation_metadata table + with op.batch_alter_table('generation_metadata', schema=None) as batch_op: + batch_op.add_column(sa.Column('completed_rows', sa.Integer(), nullable=True)) + + +def downgrade() -> None: + # Remove completed_rows column from generation_metadata table + with op.batch_alter_table('generation_metadata', schema=None) as batch_op: + batch_op.drop_column('completed_rows') \ No newline at end of file diff --git a/app/core/database.py b/app/core/database.py index f5c7682..57e286a 100644 --- a/app/core/database.py +++ b/app/core/database.py @@ -71,8 +71,9 @@ def init_db(self): job_id TEXT, job_name TEXT UNIQUE, job_status TEXT, - job_creator_name TEXT - + job_creator_name TEXT, + completed_rows INTEGER + ) """) @@ -163,8 +164,8 @@ def save_generation_metadata(self, metadata: Dict) -> int: custom_prompt, model_parameters, input_key, output_key, output_value, generate_file_name, display_name, local_export_path, hf_export_path, s3_export_path, num_questions, total_count, topics, examples, - schema, doc_paths, input_path, job_id, job_name, job_status, job_creator_name - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + schema, doc_paths, input_path, job_id, job_name, job_status, job_creator_name, completed_rows + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ values = ( @@ -194,7 +195,8 @@ def save_generation_metadata(self, metadata: Dict) -> int: metadata.get('job_id', None), metadata.get('job_name', None), metadata.get('job_status', None), - metadata.get('job_creator_name', None) + metadata.get('job_creator_name', None), + metadata.get('completed_rows', None) ) cursor.execute(query, values) @@ -212,7 +214,7 @@ def save_generation_metadata(self, metadata: Dict) -> int: print(f"Error saving metadata to database: {str(e)}") raise - def update_job_generate(self, job_name: str, generate_file_name: str, local_export_path: str, timestamp: str, job_status): + def update_job_generate(self, job_name: str, generate_file_name: str, local_export_path: str, timestamp: str, job_status, completed_rows): """Update job generate with retry mechanism""" max_retries = 3 retry_delay = 1 # seconds @@ -244,11 +246,12 @@ def update_job_generate(self, job_name: str, generate_file_name: str, local_expo SET generate_file_name = ?, local_export_path = ?, timestamp = ?, - job_status = ? + job_status = ?, + completed_rows = ? WHERE job_name = ? AND job_name IS NOT NULL AND job_name != '' - """, (generate_file_name, local_export_path, timestamp, job_status, job_name)) + """, (generate_file_name, local_export_path, timestamp, job_status, completed_rows,job_name)) rows_affected = cursor.rowcount conn.commit() diff --git a/app/migrations/alembic_schema_models.py b/app/migrations/alembic_schema_models.py index 3967a11..5eb735f 100644 --- a/app/migrations/alembic_schema_models.py +++ b/app/migrations/alembic_schema_models.py @@ -35,6 +35,7 @@ class GenerationMetadataModel(Base): job_name = Column(Text, unique=True) job_status = Column(Text) job_creator_name = Column(Text) + completed_rows = Column(Integer) class EvaluationMetadataModel(Base): __tablename__ = 'evaluation_metadata' diff --git a/app/services/synthesis_service.py b/app/services/synthesis_service.py index 5aa25bb..7f893f9 100644 --- a/app/services/synthesis_service.py +++ b/app/services/synthesis_service.py @@ -1093,7 +1093,8 @@ def json_serializable(obj): 'input_path': input_path_str, 'input_key': request.input_key, 'output_key': request.output_key, - 'output_value': request.output_value + 'output_value': request.output_value, + 'completed_rows': len(final_output) if final_output else 0 } if is_demo: @@ -1109,7 +1110,7 @@ def json_serializable(obj): generate_file_name = os.path.basename(file_path) if final_output else '' final_output_path = file_path if final_output else '' - self.db.update_job_generate(job_name, generate_file_name, final_output_path, timestamp, job_status) + self.db.update_job_generate(job_name, generate_file_name, final_output_path, timestamp, job_status, len(final_output) if final_output else 0) self.db.backup_and_restore_db() return { "status": "completed" if final_output else "failed", @@ -1157,13 +1158,14 @@ def json_serializable(obj): if saved_partial_results: # Update with actual file information for partial results generate_file_name = os.path.basename(file_path) - final_output_path = file_path + final_output_path = file_path + completed_rows = len(final_output) if final_output else 0 else: # No results saved, use empty values generate_file_name = '' final_output_path = '' - - self.db.update_job_generate(job_name, generate_file_name, final_output_path, timestamp, job_status) + completed_rows = 0 + self.db.update_job_generate(job_name, generate_file_name, final_output_path, timestamp, job_status, completed_rows = completed_rows ) raise def get_health_check(self) -> Dict: From 5c435bdab21a6254890516e9cb8b87c36f14499f Mon Sep 17 00:00:00 2001 From: Khauneesh-AI Date: Mon, 7 Jul 2025 16:42:43 +0530 Subject: [PATCH 4/9] alembic upgrade made simpler and robust for new column updates --- app/migrations/alembic_manager.py | 119 ++++++++++++++++++------------ 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/app/migrations/alembic_manager.py b/app/migrations/alembic_manager.py index 51538c6..7e1b257 100644 --- a/app/migrations/alembic_manager.py +++ b/app/migrations/alembic_manager.py @@ -1,61 +1,84 @@ # app/migrations/alembic_manager.py -from alembic.config import Config -from alembic import command -from alembic.script import ScriptDirectory -from alembic.runtime.migration import MigrationContext -from pathlib import Path -import os -from sqlalchemy import create_engine +import subprocess class AlembicMigrationManager: def __init__(self, db_path: str = None): - """Initialize Alembic with the same database path as DatabaseManager""" - self.app_path = Path(__file__).parent.parent.parent - - if db_path is None: - db_path = os.path.join(self.app_path, "metadata.db") - self.db_path = db_path - - # Initialize Alembic config - self.alembic_cfg = Config(str(self.app_path / "alembic.ini")) - self.alembic_cfg.set_main_option('script_location', str(self.app_path / "alembic")) - self.alembic_cfg.set_main_option('sqlalchemy.url', f'sqlite:///{db_path}') - - # Create engine for version checks - self.engine = create_engine(f'sqlite:///{db_path}') - - async def get_db_version(self) -> str: - """Get current database version""" - with self.engine.connect() as conn: - context = MigrationContext.configure(conn) - return context.get_current_revision() + """Initialize with database path (kept for interface compatibility)""" + self.db_path = db_path or "metadata.db" async def handle_database_upgrade(self) -> tuple[bool, str]: """ - Handle database migrations carefully to avoid disrupting existing data + Simple database migration using alembic upgrade head + No directory changes needed - already in project root """ try: - # First check if alembic_version table exists - try: - version = await self.get_db_version() - if version is None: - # Database exists but no alembic version - stamp current - command.stamp(self.alembic_cfg, "head") - return True, "Existing database stamped with current version" - except Exception: - # No alembic_version table - stamp current - command.stamp(self.alembic_cfg, "head") - return True, "Existing database stamped with current version" + # Run upgrade head - we're already in the right directory + result = subprocess.run( + ["alembic", "upgrade", "head"], + capture_output=True, + text=True, + check=True + ) - # Now check for and apply any new migrations - script = ScriptDirectory.from_config(self.alembic_cfg) - head_revision = script.get_current_head() + # Check if anything was actually upgraded + if "Running upgrade" in result.stdout: + return True, f"Database upgraded successfully: {result.stdout.strip()}" + else: + return True, "Database is already up to date" + + except subprocess.CalledProcessError as e: + error_msg = e.stderr or e.stdout or str(e) + return False, f"Database upgrade failed: {error_msg}" + except Exception as e: + return False, f"Error during database upgrade: {str(e)}" + + async def get_migration_status(self) -> dict: + """Get detailed migration status for debugging""" + try: + # Get current version + current_result = subprocess.run( + ["alembic", "current"], + capture_output=True, + text=True, + check=True + ) - if version != head_revision: - command.upgrade(self.alembic_cfg, "head") - return True, "Database schema updated successfully" + # Get head version + head_result = subprocess.run( + ["alembic", "show", "head"], + capture_output=True, + text=True, + check=True + ) - return True, "Database schema is up to date" - + return { + "current": current_result.stdout.strip(), + "head": head_result.stdout.strip(), + "status": "ready" + } + + except subprocess.CalledProcessError as e: + error_msg = e.stderr or e.stdout or str(e) + return {"error": f"Command failed: {error_msg}", "status": "error"} except Exception as e: - return False, f"Error during database upgrade: {str(e)}" \ No newline at end of file + return {"error": str(e), "status": "error"} + + async def get_current_version(self) -> str: + """Get current database version using alembic current command""" + try: + result = subprocess.run( + ["alembic", "current"], + capture_output=True, + text=True, + check=True + ) + + # Extract just the version ID from output like "2b4e8d9f6c3a (head)" + import re + match = re.search(r'([a-f0-9]{12})', result.stdout) + return match.group(1) if match else "none" + + except subprocess.CalledProcessError: + return "none" + except Exception: + return "unknown" \ No newline at end of file From d530647d07ae2944afccf4bcb0311f817e6f1c59 Mon Sep 17 00:00:00 2001 From: Khauneesh-AI Date: Mon, 7 Jul 2025 19:24:34 +0530 Subject: [PATCH 5/9] upgrade database process separated as a subprocess within same endpoint | This will enable newer migration easily without manual upgrade --- app/main.py | 23 ++++++++++++++++------- run_migrations.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 run_migrations.py diff --git a/app/main.py b/app/main.py index 1ce5d47..3ec34ce 100644 --- a/app/main.py +++ b/app/main.py @@ -1420,13 +1420,22 @@ async def perform_upgrade(): # 2. Database migrations try: - db_success, db_message = await alembic_manager.handle_database_upgrade() - if db_success: - db_upgraded = True - messages.append(db_message) - else: - messages.append(f"Database upgrade failed: {db_message}") - raise HTTPException(status_code=500, detail=db_message) + # In your upgrade endpoint, you can add this debug line: + print(f"Current working directory: {os.getcwd()}") + print(f"Alembic.ini exists: {os.path.exists('alembic.ini')}") + print("--- Starting database migration via external script ---") + # Use `uv run` to ensure the script runs within the project's virtual environment + # This is more robust than just calling 'python' + result = subprocess.run( + ["uv", "run", "python", "run_migrations.py"], + capture_output=True, + text=True, + check=True # This will raise CalledProcessError on failure + ) + + print(result.stdout) # Log the output from the script + db_upgraded = True + messages.append("Database migration check completed successfully.") except Exception as e: messages.append(f"Database migration failed: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/run_migrations.py b/run_migrations.py new file mode 100644 index 0000000..dc83e49 --- /dev/null +++ b/run_migrations.py @@ -0,0 +1,33 @@ +import asyncio +import sys +from pathlib import Path + +# Ensure the 'app' directory is in the Python path +ROOT_DIR = Path(__file__).parent +APP_DIR = ROOT_DIR / "app" +sys.path.append(str(ROOT_DIR)) + +from app.migrations.alembic_manager import AlembicMigrationManager + +async def main(): + """ + Initializes the migration manager and runs the database upgrade. + This will always use the latest code from disk. + """ + print("--- Running dedicated migration script ---") + # Assumes your DB file is named metadata.db in the root + db_path = str(ROOT_DIR / "metadata.db") + alembic_manager = AlembicMigrationManager(db_path) + + success, message = await alembic_manager.handle_database_upgrade() + + if not success: + print(f"Migration Error: {message}") + # Exit with a non-zero status code to indicate failure + sys.exit(1) + + print(f"Migration Success: {message}") + print("--- Migration script finished ---") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From 29187307143d02ee3bd071171955f60095590361 Mon Sep 17 00:00:00 2001 From: Khauneesh-AI Date: Mon, 7 Jul 2025 20:10:12 +0530 Subject: [PATCH 6/9] added ticketing dataset temmplate i.e. ala hello world template for synthetic Data studio --- app/core/config.py | 79 ++++++++++++++++++++++++++++++++++++++++++++-- app/main.py | 1 + 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/app/core/config.py b/app/core/config.py index 777a6e4..78a14f7 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -17,6 +17,7 @@ class UseCase(str, Enum): LENDING_DATA = "lending_data" #HOUSING_DATA = "housing_data" CREDIT_CARD_DATA = "credit_card_data" + TICKETING_DATASET = "ticketing_dataset" class Technique(str, Enum): SFT = "sft" @@ -595,9 +596,46 @@ class UseCaseMetadataEval(BaseModel): """, - schema=None + schema=None ), -} + + UseCase.TICKETING_DATASET: UseCaseMetadata( + name="Ticketing Dataset", + description= "Synthetic dataset for ticketing system ", + topics=["Technical Issues", "Billing Queries", "Payment queries"], + default_examples=[ + { + "Prompt": "I have received this message that I owe $300 and I was instructed to pay the bill online. I already paid this amount and I am wondering why I received this message.", + "Completion": "report_payment_issue" + }, + { + "Prompt": "I will not be able to attend the presentation and would like to cancel my rsvp.", + "Completion": "cancel_ticket" + }, + { + "Prompt": "I am having questions regarding the exact time, location, and requirements of the event and would like to talk to customer service.", + "Completion": "Customer_service" + } + ] + , + prompt= """ + Generate authentic customer support ticket interactions that have a user query and system response. + For each user query, the system generates a keyword that is used to forward the user to the specific subsystem. + Requirements for user queries: + - Use professional, respectful language + - Follow standard customer service best practices + Each response should be a single id from the following list: + cancel_ticket,customer_service,report_payment_issue + Here are the explanations of the responses: + cancel_ticket means that the customer wants to cancel the ticket. + customer_service means that customer wants to talk to customer service. + report_payment_issue means that the customer is facing payment issues and wants to be forwarded to the billing department to resolve the issue. + + """, + schema=None + ) + } + USE_CASE_CONFIGS_EVALS = { @@ -916,6 +954,43 @@ class UseCaseMetadataEval(BaseModel): Give a score rating 1-10 for the given data. If there are more than 9 points to subtract use 1 as the absolute minimum scoring. List all justification as list. + """ + ), + UseCase.TICKETING_DATASET: UseCaseMetadataEval( + name="Ticketing Dataset", + default_examples=[ + { + "score": 5, + "justification": """ + The query is professionally written, respectful, and follows customer service best practices. + The response 'report_payment_issue' is one of the allowed keywords. + The matching between the query and response is perfect according to the provided definitions. + + """}, + { + "score": 3, + "justification": """ + The query is professionally written and respectful. + The response 'cancel_ticket' is one of the allowed keywords. + While the response uses a valid keyword, it doesn't match the most appropriate category for the specific query content. + """ + }, + + ], + prompt= """ + You are given a user query for a ticketing support system and the system responses which is a keyword that is used to forward the user to the specific subsystem. + Evaluate whether the queries: + - Use professional, respectful language + - Follow standard customer service best practices + Evaluate whether the responses use only one of the the following keywords: cancel_ticket,customer_service,report_payment_issue + Evaluate whether the solutions and responses are correctly matched based on the following definitions: + cancel_ticket means that the customer wants to cancel the ticket. + customer_service means that customer wants to talk to customer service. + report_payment_issue means that the customer is facing payment issues and wants to be forwarded to the billing department to resolve the issue. + Give a score of 1-5 based on the following instructions: + If the responses don’t match the four keywords give always value 1. + Rate the quality of the queries and responses based on the instructions give a rating between 1 to 5. + """ ) } diff --git a/app/main.py b/app/main.py index 3ec34ce..88c6a9e 100644 --- a/app/main.py +++ b/app/main.py @@ -773,6 +773,7 @@ async def get_use_cases(): {"id": UseCase.CUSTOM, "name": "Custom"}, {"id": UseCase.LENDING_DATA, "name": "Lending Data"}, {"id": UseCase.CREDIT_CARD_DATA, "name": "Credit Card Data"}, + {"id": UseCase.TICKETING_DATASET, "name": "Ticketing Dataset"}, ] } From a4a16b929c627f11d4018f9ff9795f80d5f95ad1 Mon Sep 17 00:00:00 2001 From: Keivan Vosoughi Date: Sun, 29 Jun 2025 22:54:31 -0700 Subject: [PATCH 7/9] DSE-45878: Add Mute Checkbox for the Welcome Page Fix Examples Table for Remote Templates Fix mute checkbox margin top Modify Examples Fix rendering examples based on the selected use case --- .../DataGenerator/CustomPromptButton.tsx | 1 - .../src/pages/DataGenerator/Examples.tsx | 34 ++++++---- .../pages/DataGenerator/UseCaseSelector.tsx | 5 +- app/client/src/pages/DataGenerator/hooks.ts | 62 ++++++++++++++++++- app/client/src/pages/DataGenerator/types.ts | 5 ++ app/client/src/pages/Home/WelcomePage.tsx | 13 +++- app/client/src/routes.tsx | 8 ++- 7 files changed, 106 insertions(+), 22 deletions(-) diff --git a/app/client/src/pages/DataGenerator/CustomPromptButton.tsx b/app/client/src/pages/DataGenerator/CustomPromptButton.tsx index d5781be..e3fe10c 100644 --- a/app/client/src/pages/DataGenerator/CustomPromptButton.tsx +++ b/app/client/src/pages/DataGenerator/CustomPromptButton.tsx @@ -59,7 +59,6 @@ const CustomPromptButton: React.FC = ({ model_id, inference_type, caii_en const [showModal, setShowModal] = useState(false); const [disabled, setDisabled] = useState(false); const custom_prompt_instructions = Form.useWatch('custom_prompt_instructions', { form, preserve: true }); - console.log('custom_prompt_instructions', custom_prompt_instructions); const mutation = useMutation({ mutationFn: fetchCustomPrompt diff --git a/app/client/src/pages/DataGenerator/Examples.tsx b/app/client/src/pages/DataGenerator/Examples.tsx index f5ce02a..61d71ac 100644 --- a/app/client/src/pages/DataGenerator/Examples.tsx +++ b/app/client/src/pages/DataGenerator/Examples.tsx @@ -9,14 +9,14 @@ import { useMutation } from "@tanstack/react-query"; import { useFetchExamples } from '../../api/api'; import TooltipIcon from '../../components/TooltipIcon'; import PCModalContent from './PCModalContent'; -import { File, QuestionSolution, WorkflowType } from './types'; +import { ExampleType, File, QuestionSolution, WorkflowType } from './types'; import FileSelectorButton from './FileSelectorButton'; -import { fetchFileContent } from './hooks'; +import { fetchFileContent, getExampleType, useGetExamplesByUseCase } from './hooks'; import { useState } from 'react'; import FreeFormExampleTable from './FreeFormExampleTable'; -const { Title } = Typography; +const { Title, Text } = Typography; const Container = styled.div` padding-bottom: 10px ` @@ -48,10 +48,7 @@ const StyledContainer = styled.div` const MAX_EXAMPLES = 5; -enum ExampleType { - FREE_FORM = 'freeform', - PROMPT_COMPLETION = 'promptcompletion' -} + const Examples: React.FC = () => { const form = Form.useFormInstance(); @@ -90,13 +87,13 @@ const Examples: React.FC = () => { title: 'Prompts', dataIndex: 'question', ellipsis: true, - render: (_text: QuestionSolution, record: QuestionSolution) => <>{record.question} + render: (_text: QuestionSolution, record: QuestionSolution) => {record.question} }, { title: 'Completions', dataIndex: 'solution', ellipsis: true, - render: (_text: QuestionSolution, record: QuestionSolution) => <>{record.solution} + render: (_text: QuestionSolution, record: QuestionSolution) => {record.solution} }, { title: 'Actions', @@ -178,13 +175,24 @@ const Examples: React.FC = () => { /> ) - }}, + } + }, ]; const dataSource = Form.useWatch('examples', form); - const { data: examples, loading: examplesLoading } = useFetchExamples(form.getFieldValue('use_case')); + const { examples, exmpleFormat, isLoading: examplesLoading } = + useGetExamplesByUseCase(form.getFieldValue('use_case')); + + // update examples if (!dataSource && examples) { - form.setFieldValue('examples', examples.examples) + form.setFieldValue('examples', examples) } + useEffect(() => { + if (!isEmpty(examples) && !isEmpty(exmpleFormat)) { + setExampleType(exmpleFormat as ExampleType); + form.setFieldValue('examples', examples || []); + } + }, [examples, exmpleFormat]); + const rowLimitReached = form.getFieldValue('examples')?.length === MAX_EXAMPLES; const workflowType = form.getFieldValue('workflow_type'); @@ -299,6 +307,8 @@ const Examples: React.FC = () => { {exampleType === ExampleType.FREE_FORM && !isEmpty(mutation.data) && } + {exampleType === ExampleType.FREE_FORM && form.getFieldValue('use_case') === 'lending_data' && + } {exampleType === ExampleType.FREE_FORM && isEmpty(mutation.data) && !isEmpty(values.examples) && } {exampleType === ExampleType.FREE_FORM && isEmpty(mutation.data) && isEmpty(values.examples) && diff --git a/app/client/src/pages/DataGenerator/UseCaseSelector.tsx b/app/client/src/pages/DataGenerator/UseCaseSelector.tsx index 558ee28..bb38a35 100644 --- a/app/client/src/pages/DataGenerator/UseCaseSelector.tsx +++ b/app/client/src/pages/DataGenerator/UseCaseSelector.tsx @@ -10,18 +10,15 @@ interface Props {} const UseCaseSelector: FunctionComponent = () => { const [useCases, setUseCases] = useState([]); const useCasesReq = useGetUseCases(); - console.log('useCasesReq', useCasesReq); useEffect(() => { if (useCasesReq.data) { - console.log('useCasesReq.data', useCasesReq.data); let _useCases = get(useCasesReq, 'data.usecases', []); _useCases = _useCases.map((useCase: any) => ({ ...useCase, label: useCase.name, value: useCase.id })); - console.log('_useCases', _useCases); setUseCases(_useCases); } }, [useCasesReq.data]); @@ -34,7 +31,7 @@ const UseCaseSelector: FunctionComponent = () => { rules={[ { required: true } ]} - tooltip='A specialize template for generating your dataset' + tooltip='A specialized template for generating your dataset' labelCol={{ span: 8 }} diff --git a/app/client/src/pages/DataGenerator/hooks.ts b/app/client/src/pages/DataGenerator/hooks.ts index 5bd40e5..c6069bb 100644 --- a/app/client/src/pages/DataGenerator/hooks.ts +++ b/app/client/src/pages/DataGenerator/hooks.ts @@ -4,7 +4,8 @@ import toNumber from 'lodash/toNumber'; import isEmpty from 'lodash/isEmpty'; import isString from 'lodash/isString'; import { useMutation, useQuery } from '@tanstack/react-query'; -import { WorkflowType } from './types'; +import { ExampleType, WorkflowType } from './types'; +import { first } from 'lodash'; const BASE_API_URL = import.meta.env.VITE_AMP_URL; @@ -257,7 +258,7 @@ export const useDatasetSize = ( export const useGetUseCases = () => { const { data, isLoading, isError, error, isFetching } = useQuery( { - queryKey: ['fetchUseCases', fetchUseCases], + queryKey: ['useCases'], queryFn: () => fetchUseCases(), refetchOnWindowFocus: false, } @@ -268,4 +269,59 @@ export const useGetUseCases = () => { isError, error }; -} \ No newline at end of file +} + +export const fetchExamplesByUseCase = async (use_case: string) => { + const resp = await fetch(`${BASE_API_URL}/${isEmpty(use_case) ? 'custom' : use_case}/gen_examples`, { + method: 'GET' + }); + const body = await resp.json(); + return body; +} + +export const useGetExamplesByUseCase = (use_case: string) => { + const { data, isLoading, isError, error, isFetching } = useQuery( + { + queryKey: ['fetchUseCaseTopics', fetchExamplesByUseCase], + queryFn: () => fetchExamplesByUseCase(use_case), + refetchOnWindowFocus: false, + } + ); + + if (isError) { + notification.error({ + message: 'Error', + description: `An error occurred while fetching the use case examples.\n ${error?.message}` + }); + } + + + let examples = []; + let exmpleFormat: ExampleType | null = null; + if (!isEmpty(data) && !isEmpty(data?.examples)) { + examples = get(data, 'examples', []); + exmpleFormat = getExampleType(examples); + } + + return { + data, + isLoading: isLoading || isFetching, + isError, + error, + examples, + exmpleFormat + }; +} + +export const getExampleType = (data: object[]) => { + if (!isEmpty(data)) { + const row = first(data); + const keys = Object.keys(row as object); + if (keys.length === 2) { + return ExampleType.PROMPT_COMPLETION; + } + return ExampleType.FREE_FORM; + } + return null; +} + diff --git a/app/client/src/pages/DataGenerator/types.ts b/app/client/src/pages/DataGenerator/types.ts index cb029ed..73c64b2 100644 --- a/app/client/src/pages/DataGenerator/types.ts +++ b/app/client/src/pages/DataGenerator/types.ts @@ -119,4 +119,9 @@ export enum TechniqueType { SFT = 'sft', CUSTOME_WORKFLOW = 'custom_workflow', FREE_FORM = 'freeform' +} + +export enum ExampleType { + FREE_FORM = 'freeform', + PROMPT_COMPLETION = 'promptcompletion' } \ No newline at end of file diff --git a/app/client/src/pages/Home/WelcomePage.tsx b/app/client/src/pages/Home/WelcomePage.tsx index 82498ab..135f70d 100644 --- a/app/client/src/pages/Home/WelcomePage.tsx +++ b/app/client/src/pages/Home/WelcomePage.tsx @@ -1,10 +1,12 @@ -import { Button, Col, Flex, Layout, Row, Image } from 'antd'; +import toString from 'lodash/toString'; +import { Button, Col, Flex, Layout, Row, Image, Checkbox } from 'antd'; import React from 'react'; import styled from 'styled-components'; import SDGIcon from '../../assets/sdg-landing.svg'; import LightBulbIcon from '../../assets/ic-lightbulb.svg'; import QueryPromptIcon from '../../assets/ic-query-prompt.svg'; import NumbersIcon from '../../assets/ic-numbers.svg'; +import { CheckboxChangeEvent } from 'antd/es/checkbox'; const { Content } = Layout; @@ -107,6 +109,11 @@ const InfoSection = styled.div` const WelcomePage: React.FC = () => { + const onChange = (e: CheckboxChangeEvent) => { + const checked = e.target.checked; + window.localStorage.setItem('sds_mute_welcome_page', toString(checked)); + } + return ( @@ -148,6 +155,10 @@ const WelcomePage: React.FC = () => {
+ +
+ {`Don't show me this again`} +
diff --git a/app/client/src/routes.tsx b/app/client/src/routes.tsx index 853257c..ee60001 100644 --- a/app/client/src/routes.tsx +++ b/app/client/src/routes.tsx @@ -12,6 +12,10 @@ import EvaluationDetailsPage from "./pages/EvaluationDetails/EvaluationDetailsPa //import TelemetryDashboard from "./components/TelemetryDashboard"; +const isWelcomePageMuted = () => { + return window.localStorage.getItem('sds_mute_welcome_page') === 'true'; +} + const router = createBrowserRouter([ { path: '/', @@ -19,7 +23,9 @@ const router = createBrowserRouter([ children: [ { path: '/', // Redirect root to Pages.WELCOME - element: , + element: isWelcomePageMuted() ? : + , + errorElement: }, { path: Pages.HOME, From decfbbac6faf7101e2223ea7bb6ee48845d6a877 Mon Sep 17 00:00:00 2001 From: Keivan Vosoughi Date: Tue, 8 Jul 2025 19:55:27 -0700 Subject: [PATCH 8/9] DSE-45114: Add Data Augmentation --- .../src/pages/DataGenerator/DataGenerator.tsx | 10 ++++++++-- app/client/src/pages/Home/HomePage.tsx | 20 +++++++++++++++++++ app/client/src/routes.tsx | 10 ++++++++-- app/client/src/types.ts | 6 ++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/app/client/src/pages/DataGenerator/DataGenerator.tsx b/app/client/src/pages/DataGenerator/DataGenerator.tsx index bbf9b71..eab6071 100644 --- a/app/client/src/pages/DataGenerator/DataGenerator.tsx +++ b/app/client/src/pages/DataGenerator/DataGenerator.tsx @@ -1,6 +1,6 @@ import isEmpty from 'lodash/isEmpty'; import isString from 'lodash/isString'; -import { useEffect, useRef, useState } from 'react'; +import { FunctionComponent, useEffect, useRef, useState } from 'react'; import { useLocation, useParams } from 'react-router-dom'; import { Button, Flex, Form, Layout, Steps } from 'antd'; @@ -20,10 +20,15 @@ import { DataGenWizardSteps, WizardStepConfig, WorkflowType } from './types'; import { WizardCtx } from './utils'; import { fetchDatasetDetails, useGetDatasetDetails } from '../DatasetDetails/hooks'; import { useMutation } from '@tanstack/react-query'; +import { WizardModeType } from '../../types'; const { Content } = Layout; // const { Title } = Typography; +interface Props { + mode?: WizardModeType; +} + const StyledTitle = styled.div` margin-top: 10px; font-family: Roboto, -apple-system, 'Segoe UI', sans-serif; @@ -95,7 +100,8 @@ const steps: WizardStepConfig[] = [ /** * Wizard component for Synthetic Data Generation workflow */ -const DataGenerator = () => { +const DataGenerator: FunctionComponent = ({ mode }) => { + console.log('DataGenerator mode: ', mode); const [current, setCurrent] = useState(0); const [maxStep, setMaxStep] = useState(0); const [isStepValid, setIsStepValid] = useState(false); diff --git a/app/client/src/pages/Home/HomePage.tsx b/app/client/src/pages/Home/HomePage.tsx index 231ba64..874668b 100644 --- a/app/client/src/pages/Home/HomePage.tsx +++ b/app/client/src/pages/Home/HomePage.tsx @@ -7,6 +7,7 @@ import EvaluationsTab from './EvaluationsTab'; import DatasetIcon from '../../assets/ic-datasets.svg'; import ArrowRightIcon from '../../assets/ic-arrow-right.svg'; import EvaluateIcon from '../../assets/ic-evaluations.svg'; +import DataAugmentationIcon from '../../assets/ic-data-augmentation.svg'; import EvaluateButton from './EvaluateButton'; import ExportsTab from './ExportsTab'; @@ -116,6 +117,25 @@ const HomePage: React.FC = () => { + +
+ Datasets +
+
+
Data Augmentation
+
+

Generate multi-dimension datasets using LLM custom prompts

+
+
+
+
+ +
+
+
Datasets diff --git a/app/client/src/routes.tsx b/app/client/src/routes.tsx index ee60001..d611417 100644 --- a/app/client/src/routes.tsx +++ b/app/client/src/routes.tsx @@ -2,7 +2,7 @@ import { Navigate, createBrowserRouter } from "react-router-dom"; import Layout from "./Container"; import DataGenerator from "./pages/DataGenerator"; import HomePage from "./pages/Home"; -import { Pages } from "./types"; +import { Pages, WizardModeType } from "./types"; import EvaluatorPage from "./pages/Evaluator"; import ReevaluatorPage from "./pages/Evaluator/ReevaluatorPage"; import DatasetDetailsPage from "./pages/DatasetDetails/DatasetDetailsPage"; @@ -35,7 +35,13 @@ const router = createBrowserRouter([ }, { path: Pages.GENERATOR, - element: , + element: , + errorElement: , + loader: async () => null + }, + { + path: Pages.GENERATOR, + element: , errorElement: , loader: async () => null }, diff --git a/app/client/src/types.ts b/app/client/src/types.ts index 295cccb..2490874 100644 --- a/app/client/src/types.ts +++ b/app/client/src/types.ts @@ -1,5 +1,6 @@ export enum Pages { GENERATOR = 'data-generator', + DATA_AUGMENTATION = 'data-augmentation', REGENERATE = 're-generate', EVALUATOR = 'evaluator', HISTORY = 'history', @@ -52,4 +53,9 @@ export interface UseCase { id: string; label: string; value: string; +} + +export enum WizardModeType { + DATA_GENERATION = 'data-generation', + DATA_AUGMENTATION = 'data-augmention' } \ No newline at end of file From ec2454e6280e8c4eade0f7b11077c82360f6934c Mon Sep 17 00:00:00 2001 From: Keivan Vosoughi Date: Tue, 8 Jul 2025 20:10:31 -0700 Subject: [PATCH 9/9] DSE-45114: Add Data Augmentation Icon This PR contains following changes: - Adds new card for Data Augmentation to the Home Page - Hide Workflow Type for the Data Augmentation --- .../src/assets/ic-data-augmentation.svg | 13 +++++++++++ .../src/pages/DataGenerator/Configure.tsx | 22 +++++++++++++++++-- .../src/pages/DataGenerator/DataGenerator.tsx | 1 + app/client/src/pages/DataGenerator/utils.ts | 13 +++++++++++ app/client/src/pages/Home/HomePage.tsx | 4 ++-- app/client/src/routes.tsx | 2 +- 6 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 app/client/src/assets/ic-data-augmentation.svg diff --git a/app/client/src/assets/ic-data-augmentation.svg b/app/client/src/assets/ic-data-augmentation.svg new file mode 100644 index 0000000..38e47ea --- /dev/null +++ b/app/client/src/assets/ic-data-augmentation.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/client/src/pages/DataGenerator/Configure.tsx b/app/client/src/pages/DataGenerator/Configure.tsx index bb7d42e..28bd3a4 100644 --- a/app/client/src/pages/DataGenerator/Configure.tsx +++ b/app/client/src/pages/DataGenerator/Configure.tsx @@ -8,9 +8,12 @@ import { File, WorkflowType } from './types'; import { useFetchModels } from '../../api/api'; import { MODEL_PROVIDER_LABELS } from './constants'; import { ModelProviders, ModelProvidersDropdownOpts } from './types'; -import { useWizardCtx } from './utils'; +import { getWizardModel, getWizardModeType, useWizardCtx } from './utils'; import FileSelectorButton from './FileSelectorButton'; import UseCaseSelector from './UseCaseSelector'; +import { useLocation } from 'react-router-dom'; +import { WizardModeType } from '../../types'; +import { get } from 'lodash'; const StepContainer = styled(Flex)` @@ -39,7 +42,7 @@ export const USECASE_OPTIONS = [ export const WORKFLOW_OPTIONS = [ { label: 'Supervised Fine-Tuning', value: 'supervised-fine-tuning' }, { label: 'Custom Data Generation', value: 'custom' }, - { label: 'Freeform Data Generation', value: 'freeform' } + // { label: 'Freeform Data Generation', value: 'freeform' } ]; export const MODEL_TYPE_OPTIONS: ModelProvidersDropdownOpts = [ @@ -48,6 +51,18 @@ export const MODEL_TYPE_OPTIONS: ModelProvidersDropdownOpts = [ ]; const Configure = () => { + const location = useLocation(); + const [wizardModeType, setWizardModeType] = useState(getWizardModeType(location)); + + useEffect(() => { + if (wizardModeType === WizardModeType.DATA_AUGMENTATION) { + setWizardModeType(WizardModeType.DATA_AUGMENTATION); + form.setFieldValue('workflow_type', 'freeform'); + } else { + setWizardModeType(WizardModeType.DATA_GENERATION); + } + }, [location, wizardModeType]); + const form = Form.useFormInstance(); const formData = Form.useWatch((values) => values, form); const { setIsStepValid } = useWizardCtx(); @@ -141,8 +156,10 @@ const Configure = () => { label='Model Provider' rules={[{ required: true }]} labelCol={labelCol} + shouldUpdate >