Skip to content

Commit bc1e7c4

Browse files
committed
tools
1 parent 120c00c commit bc1e7c4

File tree

6 files changed

+166
-2
lines changed

6 files changed

+166
-2
lines changed

bin/totgen.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,10 @@ exec "${_spark_shell}" \
162162
--master "local[4]" \
163163
--conf spark.app.name="[drgscl]::spark-shell" \
164164
--conf spark.eventLog.enabled=false \
165+
--conf spark.driver.memory=10g \
166+
--conf spark.executor.memory=10g \
165167
--jars "${_submit_jars}" \
168+
--verbose \
166169
$@
167170
168171
_EXEC_SCRIPT_EOF_

build.sbt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Your sbt build file. Guides on how to write one can be found at
22
// http://www.scala-sbt.org/0.13/docs/index.html
33

4-
val sparkVer = sys.props.getOrElse("spark.version", "2.1.1")
4+
val sparkVer = sys.props.getOrElse("spark.version", "2.2.0")
55
val sparkBranch = sparkVer.substring(0, 3)
66
val defaultScalaVer = sparkBranch match {
77
case "2.0" => "2.11.8"
@@ -19,7 +19,7 @@ scalaVersion := scalaVer
1919
spName := "databricks/spark-deep-learning"
2020

2121
// Don't forget to set the version
22-
version := s"0.1.0-spark$sparkBranch"
22+
version := s"0.2.0-spark$sparkBranch"
2323

2424
// All Spark Packages need a license
2525
licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0"))

linter.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
_bsd_="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
4+
5+
if [[ $# -gt 1 ]]; then
6+
target_files=(${@})
7+
else
8+
target_files=($(git diff --name-only upstream/master HEAD))
9+
fi
10+
11+
echo "${target_files[@]}"
12+
pushd "${_bsd_}"
13+
exec prospector --profile ${_bsd_}/prospector.yaml "${target_files[@]}"
14+
popd

project/GenClasspathPlugin.scala

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package sbtgenclasspath
2+
3+
import sbt._, Keys._
4+
import sbtsparkpackage.SparkPackagePlugin.autoImport._
5+
import libdeps.LibVers._
6+
7+
object GenClasspathPlugin extends sbt.AutoPlugin {
8+
9+
object autoImport {
10+
11+
lazy val genClasspath = taskKey[Unit]("Build runnable script with classpath")
12+
lazy val extraSparkSubmitModules = settingKey[Seq[ModuleID]]("Additional spark submit jar dependencies")
13+
14+
lazy val genClasspathSettings: Seq[Def.Setting[_]] = Seq(
15+
16+
extraSparkSubmitModules := Seq.empty[ModuleID],
17+
18+
genClasspath := {
19+
import java.io.PrintWriter
20+
21+
val sbtPathRoot = baseDirectory.value / ".sbt.paths"
22+
sbtPathRoot.mkdirs()
23+
24+
def writeClasspath(cpType: String)(R: => String): Unit = {
25+
val fout = new PrintWriter((sbtPathRoot / s"SBT_${cpType}_CLASSPATH").toString)
26+
println(s"Building ${cpType} classpath for current project")
27+
try fout.write(R) finally fout.close()
28+
}
29+
30+
writeClasspath("RUNTIME") {
31+
(fullClasspath in Runtime).value.files.map(_.toString).mkString(":")
32+
}
33+
34+
writeClasspath("SPARK_PACKAGE") {
35+
import scala.util.matching.Regex
36+
val patt = s"(.+?)/(.+?):(.+?)(-s_${scalaMajorVer})?".r
37+
val pkgs = (spDependencies.value).map { _ match {
38+
case patt(orgName, pkgName, pkgVer, stem, _*) =>
39+
if (null != stem) {
40+
println(s"org ${orgName}, pkg ${pkgName}, ver ${pkgVer}, ${stem}")
41+
s"${pkgName}-${pkgVer}${stem}.jar"
42+
} else {
43+
println(s"org ${orgName}, pkg ${pkgName}, ver ${pkgVer}")
44+
s"${pkgName}-${pkgVer}.jar"
45+
}
46+
}}.toSet
47+
48+
// TODO: not knowing the proper way, I just fall back to Regex
49+
val extraSpModIds = (extraSparkSubmitModules in Compile).value.flatMap { mod =>
50+
//"com.typesafe.scala-logging:scala-logging-api:2.1.2"
51+
// scala-logging-api_2.11-2.1.2.jar
52+
val patt = s"(.+?):(.+?):(.+?)".r
53+
mod.toString match {
54+
case patt(orgName, pkgName, pkgVer) =>
55+
Seq(s"${pkgName}_${scalaMajorVer}-${pkgVer}.jar", s"${pkgName}-${pkgVer}.jar")
56+
}
57+
}.toSet
58+
59+
(fullClasspath in Compile).value.files.filter { cpFile =>
60+
val cpName = cpFile.getName
61+
println(cpName)
62+
(pkgs contains cpName) || (extraSpModIds contains cpName)
63+
}.map(_.toString).mkString(":")
64+
}
65+
}
66+
)
67+
}
68+
import autoImport._
69+
70+
override def requires = sbt.plugins.JvmPlugin
71+
72+
// This plugin is automatically enabled for projects which are JvmPlugin.
73+
override def trigger = allRequirements
74+
75+
// a group of settings that are automatically added to projects.
76+
override val projectSettings =
77+
inConfig(Compile)(genClasspathSettings) ++ inConfig(Test)(genClasspathSettings)
78+
}

project/LibDeps.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package libdeps
2+
3+
/**
4+
======================================================
5+
* Build parameters
6+
======================================================
7+
*/
8+
object LibVers {
9+
10+
lazy val sparkVer = sys.props.getOrElse("spark.version", "2.1.1")
11+
lazy val sparkBranch = sparkVer.substring(0, 3)
12+
lazy val defaultScalaVer = sparkBranch match {
13+
case "2.0" => "2.11.11"
14+
case "2.1" => "2.11.11"
15+
case "2.2" => "2.11.11"
16+
case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.")
17+
}
18+
19+
lazy val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer)
20+
lazy val scalaMajorVer = scalaVer.substring(0, scalaVer.indexOf(".", scalaVer.indexOf(".") + 1))
21+
22+
lazy val protobufVer = "3.3.1" // use protocol buffer, will be shaded
23+
lazy val tensorflowVer = "1.2.1"
24+
lazy val ammVer = "0.9.9"
25+
26+
lazy val defaultScalaTestVer = scalaVer match {
27+
case s if s.startsWith("2.10") => "2.0"
28+
case s if s.startsWith("2.11") => "2.2.6" // scalatest_2.11 does not have 2.0 published
29+
}
30+
}

prospector.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
strictness: high
2+
test-warnings: True
3+
doc-warnings: false
4+
5+
ignore-paths:
6+
- docs
7+
- spark-warehouse
8+
- cover
9+
10+
max-line-length: 100
11+
12+
pep8:
13+
run: true
14+
disable:
15+
- N802
16+
- N803
17+
- N806
18+
- E302
19+
20+
pylint:
21+
run: true
22+
disable:
23+
- too-many-instance-attributes
24+
- cyclic-import
25+
- len-as-condition
26+
- invalid-name
27+
- no-else-return
28+
- no-self-use
29+
- import-error
30+
- protected-access
31+
- reimported
32+
33+
mccabe:
34+
disable:
35+
- MC0001
36+
37+
pyroma:
38+
run: true
39+

0 commit comments

Comments
 (0)