@@ -244,64 +244,87 @@ def get_language_file_extensions(language: ProgrammingLanguage):
244244
245245
246246def determine_project_language (folder_path : str , strategy : Literal ["most_common" , "package_json" ] = "package_json" ) -> ProgrammingLanguage :
247+ """Determines the primary programming language of a project.
248+
249+ Args:
250+ folder_path (str): Path to the folder to analyze
251+ strategy (Literal["most_common", "package_json"]): Strategy to use for determining language.
252+ "most_common" analyzes file extensions, "package_json" checks for package.json presence.
253+
254+ Returns:
255+ ProgrammingLanguage: The determined programming language
256+ """
257+ # TODO: Create a new strategy that follows gitignore
247258 if strategy == "most_common" :
248- # Analyzes a folder to determine the primary programming language based on file extensions.
249- # Returns the language with the most matching files.
250- from codegen .sdk .python import PyFile
251- from codegen .sdk .typescript .file import TSFile
252-
253- EXTENSIONS = {
254- ProgrammingLanguage .PYTHON : PyFile .get_extensions (),
255- ProgrammingLanguage .TYPESCRIPT : TSFile .get_extensions (),
256- }
257-
258- """
259- Analyzes a folder to determine the primary programming language based on file extensions.
260- Returns the language with the most matching files.
261-
262- Args:
263- folder_path (str): Path to the folder to analyze
264-
265- Returns:
266- Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
267- """
268- folder = Path (folder_path )
269- if not folder .exists () or not folder .is_dir ():
270- msg = f"Invalid folder path: { folder_path } "
271- raise ValueError (msg )
259+ return _determine_language_by_file_count (folder_path )
260+ elif strategy == "package_json" :
261+ return _determine_language_by_package_json (folder_path )
272262
273- # Initialize counters for each language
274- language_counts = Counter ()
275263
276- # Walk through the directory
277- for file_path in folder .rglob ("*" ):
278- # Skip directories and hidden files
279- if file_path .is_dir () or file_path .name .startswith ("." ):
280- continue
264+ def _determine_language_by_package_json (folder_path : str ) -> ProgrammingLanguage :
265+ """Determines project language by checking for presence of package.json.
266+ Faster but less accurate than file count strategy.
281267
282- # Skip common directories to ignore
283- if any (ignore in str (file_path ) for ignore in [".git" , "node_modules" , "__pycache__" , "venv" , ".env" ]):
284- continue
268+ Args:
269+ folder_path (str): Path to the folder to analyze
285270
286- # Count files for each language based on extensions
287- for language , exts in EXTENSIONS .items ():
288- if file_path .suffix in exts :
289- language_counts [language ] += 1
271+ Returns:
272+ ProgrammingLanguage: TYPESCRIPT if package.json exists, otherwise PYTHON
273+ """
274+ package_json_path = Path (folder_path ) / "package.json"
275+ if package_json_path .exists ():
276+ return ProgrammingLanguage .TYPESCRIPT
277+ else :
278+ return ProgrammingLanguage .PYTHON
290279
291- # If no files found, return None
292- if not language_counts :
293- return ProgrammingLanguage .UNSUPPORTED
294280
295- # Return the language with the highest count
296- return language_counts .most_common (1 )[0 ][0 ]
297- elif strategy == "package_json" :
298- # TODO: Hacky implementation that checks for package.json.
299- # Faster but less accurate than the most_common strategy.
300- package_json_path = Path (folder_path ) / "package.json"
301- if package_json_path .exists ():
302- return ProgrammingLanguage .TYPESCRIPT
303- else :
304- return ProgrammingLanguage .PYTHON
281+ def _determine_language_by_file_count (folder_path : str ) -> ProgrammingLanguage :
282+ """Analyzes a folder to determine the primary programming language based on file extensions.
283+ Returns the language with the most matching files.
284+
285+ Args:
286+ folder_path (str): Path to the folder to analyze
287+
288+ Returns:
289+ ProgrammingLanguage: The dominant programming language, or UNSUPPORTED if no matching files found
290+ """
291+ from codegen .sdk .python import PyFile
292+ from codegen .sdk .typescript .file import TSFile
293+
294+ EXTENSIONS = {
295+ ProgrammingLanguage .PYTHON : PyFile .get_extensions (),
296+ ProgrammingLanguage .TYPESCRIPT : TSFile .get_extensions (),
297+ }
298+
299+ folder = Path (folder_path )
300+ if not folder .exists () or not folder .is_dir ():
301+ msg = f"Invalid folder path: { folder_path } "
302+ raise ValueError (msg )
303+
304+ # Initialize counters for each language
305+ language_counts = Counter ()
306+
307+ # Walk through the directory
308+ for file_path in folder .rglob ("*" ):
309+ # Skip directories and hidden files
310+ if file_path .is_dir () or file_path .name .startswith ("." ):
311+ continue
312+
313+ # Skip common directories to ignore
314+ if any (ignore in str (file_path ) for ignore in [".git" , "node_modules" , "__pycache__" , "venv" , ".env" ]):
315+ continue
316+
317+ # Count files for each language based on extensions
318+ for language , exts in EXTENSIONS .items ():
319+ if file_path .suffix in exts :
320+ language_counts [language ] += 1
321+
322+ # If no files found, return None
323+ if not language_counts :
324+ return ProgrammingLanguage .UNSUPPORTED
325+
326+ # Return the language with the highest count
327+ return language_counts .most_common (1 )[0 ][0 ]
305328
306329
307330def split_git_path (filepath : str ) -> tuple [str , str | None ]:
0 commit comments