Skip to content

Commit 50a38f7

Browse files
author
codegen-bot
committed
Cleaned up implementation
1 parent c7928d2 commit 50a38f7

File tree

1 file changed

+74
-51
lines changed

1 file changed

+74
-51
lines changed

src/codegen/sdk/utils.py

Lines changed: 74 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -244,64 +244,87 @@ def get_language_file_extensions(language: ProgrammingLanguage):
244244

245245

246246
def determine_project_language(folder_path: str, strategy: Literal["most_common", "package_json"] = "package_json") -> ProgrammingLanguage:
247+
"""Determines the primary programming language of a project.
248+
249+
Args:
250+
folder_path (str): Path to the folder to analyze
251+
strategy (Literal["most_common", "package_json"]): Strategy to use for determining language.
252+
"most_common" analyzes file extensions, "package_json" checks for package.json presence.
253+
254+
Returns:
255+
ProgrammingLanguage: The determined programming language
256+
"""
257+
# TODO: Create a new strategy that follows gitignore
247258
if strategy == "most_common":
248-
# Analyzes a folder to determine the primary programming language based on file extensions.
249-
# Returns the language with the most matching files.
250-
from codegen.sdk.python import PyFile
251-
from codegen.sdk.typescript.file import TSFile
252-
253-
EXTENSIONS = {
254-
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
255-
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
256-
}
257-
258-
"""
259-
Analyzes a folder to determine the primary programming language based on file extensions.
260-
Returns the language with the most matching files.
261-
262-
Args:
263-
folder_path (str): Path to the folder to analyze
264-
265-
Returns:
266-
Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
267-
"""
268-
folder = Path(folder_path)
269-
if not folder.exists() or not folder.is_dir():
270-
msg = f"Invalid folder path: {folder_path}"
271-
raise ValueError(msg)
259+
return _determine_language_by_file_count(folder_path)
260+
elif strategy == "package_json":
261+
return _determine_language_by_package_json(folder_path)
272262

273-
# Initialize counters for each language
274-
language_counts = Counter()
275263

276-
# Walk through the directory
277-
for file_path in folder.rglob("*"):
278-
# Skip directories and hidden files
279-
if file_path.is_dir() or file_path.name.startswith("."):
280-
continue
264+
def _determine_language_by_package_json(folder_path: str) -> ProgrammingLanguage:
265+
"""Determines project language by checking for presence of package.json.
266+
Faster but less accurate than file count strategy.
281267
282-
# Skip common directories to ignore
283-
if any(ignore in str(file_path) for ignore in [".git", "node_modules", "__pycache__", "venv", ".env"]):
284-
continue
268+
Args:
269+
folder_path (str): Path to the folder to analyze
285270
286-
# Count files for each language based on extensions
287-
for language, exts in EXTENSIONS.items():
288-
if file_path.suffix in exts:
289-
language_counts[language] += 1
271+
Returns:
272+
ProgrammingLanguage: TYPESCRIPT if package.json exists, otherwise PYTHON
273+
"""
274+
package_json_path = Path(folder_path) / "package.json"
275+
if package_json_path.exists():
276+
return ProgrammingLanguage.TYPESCRIPT
277+
else:
278+
return ProgrammingLanguage.PYTHON
290279

291-
# If no files found, return None
292-
if not language_counts:
293-
return ProgrammingLanguage.UNSUPPORTED
294280

295-
# Return the language with the highest count
296-
return language_counts.most_common(1)[0][0]
297-
elif strategy == "package_json":
298-
# TODO: Hacky implementation that checks for package.json.
299-
# Faster but less accurate than the most_common strategy.
300-
package_json_path = Path(folder_path) / "package.json"
301-
if package_json_path.exists():
302-
return ProgrammingLanguage.TYPESCRIPT
303-
else:
304-
return ProgrammingLanguage.PYTHON
281+
def _determine_language_by_file_count(folder_path: str) -> ProgrammingLanguage:
282+
"""Analyzes a folder to determine the primary programming language based on file extensions.
283+
Returns the language with the most matching files.
284+
285+
Args:
286+
folder_path (str): Path to the folder to analyze
287+
288+
Returns:
289+
ProgrammingLanguage: The dominant programming language, or UNSUPPORTED if no matching files found
290+
"""
291+
from codegen.sdk.python import PyFile
292+
from codegen.sdk.typescript.file import TSFile
293+
294+
EXTENSIONS = {
295+
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
296+
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
297+
}
298+
299+
folder = Path(folder_path)
300+
if not folder.exists() or not folder.is_dir():
301+
msg = f"Invalid folder path: {folder_path}"
302+
raise ValueError(msg)
303+
304+
# Initialize counters for each language
305+
language_counts = Counter()
306+
307+
# Walk through the directory
308+
for file_path in folder.rglob("*"):
309+
# Skip directories and hidden files
310+
if file_path.is_dir() or file_path.name.startswith("."):
311+
continue
312+
313+
# Skip common directories to ignore
314+
if any(ignore in str(file_path) for ignore in [".git", "node_modules", "__pycache__", "venv", ".env"]):
315+
continue
316+
317+
# Count files for each language based on extensions
318+
for language, exts in EXTENSIONS.items():
319+
if file_path.suffix in exts:
320+
language_counts[language] += 1
321+
322+
# If no files found, return None
323+
if not language_counts:
324+
return ProgrammingLanguage.UNSUPPORTED
325+
326+
# Return the language with the highest count
327+
return language_counts.most_common(1)[0][0]
305328

306329

307330
def split_git_path(filepath: str) -> tuple[str, str | None]:

0 commit comments

Comments
 (0)