Created
February 6, 2018 23:03
-
-
Save pdmack/69716f16a03fcfddf3808309bad9be9c to your computer and use it in GitHub Desktop.
Revisions
-
Pete MacKinnon created this gist
Feb 6, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,636 @@ --- apiVersion: v1 data: jupyterhub_config.py: | import json import os from kubespawner.spawner import KubeSpawner from jhub_remote_user_authenticator.remote_user_auth import RemoteUserAuthenticator from oauthenticator.github import GitHubOAuthenticator class KubeFormSpawner(KubeSpawner): def _options_form_default(self): return ''' <label for='image'>Image</label> <input name='image' placeholder='repo/image:tag'></input> <br/><br/> <label for='cpu_guarantee'>CPU</label> <input name='cpu_guarantee' placeholder='200m, 1.0, 2.5, etc'></input> <br/><br/> <label for='mem_guarantee'>Memory</label> <input name='mem_guarantee' placeholder='100Mi, 1.5Gi'></input> <br/><br/> <label for='extra_resource_limits'>Extra Resource Limits</label> <input name='extra_resource_limits' placeholder='{'nvidia.com/gpu': '3'}'></input> <br/><br/> ''' def options_from_form(self, formdata): options = {} options['image'] = formdata.get('image', [''])[0].strip() options['cpu_guarantee'] = formdata.get('cpu_guarantee', [''])[0].strip() options['mem_guarantee'] = formdata.get('mem_guarantee', [''])[0].strip() options['extra_resource_limits'] = formdata.get('extra_resource_limits', [''])[0].strip() return options @property def singleuser_image_spec(self): image = 'gcr.io/kubeflow/tensorflow-notebook-cpu' if self.user_options.get('image'): image = self.user_options['image'] return image @property def cpu_guarantee(self): cpu = '500m' if self.user_options.get('cpu_guarantee'): cpu = self.user_options['cpu_guarantee'] return cpu @property def mem_guarantee(self): mem = '1Gi' if self.user_options.get('mem_guarantee'): mem = self.user_options['mem_guarantee'] return mem @property def extra_resource_limits(self): extra = '' if self.user_options.get('extra_resource_limits'): extra = json.loads(self.user_options['extra_resource_limits']) return extra ################################################### ### JupyterHub Options ################################################### c.JupyterHub.ip = '0.0.0.0' c.JupyterHub.hub_ip = '0.0.0.0' # Don't try to cleanup servers on exit - since in general for k8s, we want # the hub to be able to restart without losing user containers c.JupyterHub.cleanup_servers = False ################################################### ################################################### ### Spawner Options ################################################### c.JupyterHub.spawner_class = KubeFormSpawner c.KubeSpawner.singleuser_image_spec = 'gcr.io/kubeflow/tensorflow-notebook' c.KubeSpawner.cmd = 'start-singleuser.sh' c.KubeSpawner.args = ['--allow-root'] # First pulls can be really slow, so let's give it a big timeout c.KubeSpawner.start_timeout = 60 * 10 ################################################### ### Persistent volume options ################################################### # Using persistent storage requires a default storage class. # TODO(jlewi): Verify this works on minikube. # TODO(jlewi): Should we set c.KubeSpawner.singleuser_fs_gid = 1000 # see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944 c.KubeSpawner.user_storage_pvc_ensure = True # How much disk space do we want? c.KubeSpawner.user_storage_capacity = '10Gi' c.KubeSpawner.pvc_name_template = 'claim-{username}{servername}' ######## Authenticator ###### c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator' kind: ConfigMap metadata: name: jupyterhub-config namespace: kubeflow --- apiVersion: v1 kind: Service metadata: labels: app: tf-hub name: tf-hub-0 namespace: kubeflow spec: clusterIP: None ports: - name: hub port: 8000 selector: app: tf-hub --- apiVersion: v1 kind: Service metadata: labels: app: tf-hub name: tf-hub-0 namespace: kubeflow spec: clusterIP: None ports: - name: hub port: 80 targetPort: 8000 selector: app: tf-hub type: ClusterIP --- apiVersion: apps/v1beta1 kind: StatefulSet metadata: name: tf-hub namespace: kubeflow spec: replicas: 1 serviceName: "" template: metadata: labels: app: tf-hub spec: containers: - command: - jupyterhub - -f - /etc/config/jupyterhub_config.py image: gcr.io/kubeflow/jupyterhub-k8s:1.0.1 name: tf-hub ports: - containerPort: 8000 - containerPort: 8081 volumeMounts: - mountPath: /etc/config name: config-volume serviceAccountName: jupyter-hub volumes: - configMap: name: jupyterhub-config name: config-volume updateStrategy: type: RollingUpdate --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: Role metadata: name: jupyter-role namespace: kubeflow rules: - apiGroups: - '*' resources: - '*' verbs: - '*' --- apiVersion: v1 kind: ServiceAccount metadata: labels: app: jupyter-hub name: jupyter-hub namespace: kubeflow --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: RoleBinding metadata: name: jupyter-role namespace: kubeflow roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: jupyter-role subjects: - kind: ServiceAccount name: jupyter-hub namespace: kubeflow --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: tf-job-operator namespace: kubeflow spec: replicas: 1 template: metadata: labels: name: tf-job-operator spec: containers: - command: - /opt/mlkube/tf_operator - --controller-config-file=/etc/config/controller_config_file.yaml - --alsologtostderr - -v=1 env: - name: MY_POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: MY_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44 name: tf-job-operator volumeMounts: - mountPath: /etc/config name: config-volume serviceAccountName: tf-job-operator volumes: - configMap: name: tf-job-operator-config name: config-volume --- apiVersion: v1 data: controller_config_file.yaml: |- { "grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py" } kind: ConfigMap metadata: name: tf-job-operator-config namespace: kubeflow --- apiVersion: v1 kind: ServiceAccount metadata: labels: app: tf-job-operator name: tf-job-operator namespace: kubeflow --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: labels: app: tf-job-operator name: tf-job-operator rules: - apiGroups: - tensorflow.org resources: - tfjobs verbs: - '*' - apiGroups: - apiextensions.k8s.io resources: - customresourcedefinitions verbs: - '*' - apiGroups: - storage.k8s.io resources: - storageclasses verbs: - '*' - apiGroups: - batch resources: - jobs verbs: - '*' - apiGroups: - "" resources: - configmaps - pods - services - endpoints - persistentvolumeclaims - events verbs: - '*' - apiGroups: - apps - extensions resources: - deployments verbs: - '*' --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: labels: app: tf-job-operator name: tf-job-operator roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: tf-job-operator subjects: - kind: ServiceAccount name: tf-job-operator namespace: kubeflow --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: tf-job-dashboard namespace: kubeflow spec: template: metadata: labels: name: tf-job-dashboard spec: containers: - command: - /opt/tensorflow_k8s/dashboard/backend image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44 name: tf-job-dashboard ports: - containerPort: 8080 serviceAccountName: tf-job-dashboard --- apiVersion: v1 kind: Service metadata: annotations: getambassador.io/config: |- --- apiVersion: ambassador/v0 kind: Mapping name: tfjobs-ui-mapping prefix: /tfjobs/ui/ rewrite: / service: tf-job-dashboard.kubeflow name: tf-job-dashboard namespace: kubeflow spec: ports: - port: 80 targetPort: 8080 selector: name: tf-job-dashboard type: ClusterIP --- apiVersion: v1 kind: ServiceAccount metadata: labels: app: tf-job-dashboard name: tf-job-dashboard namespace: kubeflow --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: labels: app: tf-job-dashboard name: tf-job-dashboard rules: - apiGroups: - tensorflow.org resources: - tfjobs verbs: - '*' - apiGroups: - apiextensions.k8s.io resources: - customresourcedefinitions verbs: - '*' - apiGroups: - storage.k8s.io resources: - storageclasses verbs: - '*' - apiGroups: - batch resources: - jobs verbs: - '*' - apiGroups: - "" resources: - configmaps - pods - services - endpoints - persistentvolumeclaims - events verbs: - '*' - apiGroups: - apps - extensions resources: - deployments verbs: - '*' --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: labels: app: tf-job-dashboard name: tf-job-dashboard roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: tf-job-dashboard subjects: - kind: ServiceAccount name: tf-job-dashboard namespace: kubeflow --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: tf-job-dashboard namespace: kubeflow spec: template: metadata: labels: name: tf-job-dashboard spec: containers: - command: - /opt/tensorflow_k8s/dashboard/backend image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44 name: tf-job-dashboard ports: - containerPort: 8080 serviceAccountName: tf-job-dashboard --- apiVersion: v1 kind: Service metadata: annotations: getambassador.io/config: |- --- apiVersion: ambassador/v0 kind: Mapping name: tfjobs-ui-mapping prefix: /tfjobs/ui/ rewrite: / service: tf-job-dashboard.kubeflow name: tf-job-dashboard namespace: kubeflow spec: ports: - port: 80 targetPort: 8080 selector: name: tf-job-dashboard type: ClusterIP --- apiVersion: v1 kind: Service metadata: labels: service: ambassador name: ambassador namespace: kubeflow spec: ports: - name: ambassador port: 80 targetPort: 80 selector: service: ambassador type: ClusterIP --- apiVersion: v1 kind: Service metadata: labels: service: ambassador-admin name: ambassador-admin namespace: kubeflow spec: ports: - name: ambassador-admin port: 8877 targetPort: 8877 selector: service: ambassador type: ClusterIP --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: ambassador rules: - apiGroups: - "" resources: - services verbs: - get - list - watch - apiGroups: - "" resources: - configmaps verbs: - create - update - patch - get - list - watch - apiGroups: - "" resources: - secrets verbs: - get - list - watch --- apiVersion: v1 kind: ServiceAccount metadata: name: ambassador namespace: kubeflow --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: ambassador roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: ambassador subjects: - kind: ServiceAccount name: ambassador namespace: kubeflow --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: ambassador namespace: kubeflow spec: replicas: 3 template: metadata: labels: service: ambassador spec: containers: - env: - name: AMBASSADOR_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace image: quay.io/datawire/ambassador:0.22.0 imagePullPolicy: Always livenessProbe: httpGet: path: /ambassador/v0/check_alive port: 8877 initialDelaySeconds: 30 periodSeconds: 30 name: ambassador readinessProbe: httpGet: path: /ambassador/v0/check_ready port: 8877 initialDelaySeconds: 30 periodSeconds: 30 resources: limits: cpu: 1 memory: 400Mi requests: cpu: 200m memory: 100Mi - image: quay.io/datawire/statsd:0.22.0 name: statsd restartPolicy: Always serviceAccountName: ambassador --- apiVersion: v1 kind: Service metadata: annotations: getambassador.io/config: |- --- apiVersion: ambassador/v0 kind: Mapping name: k8s-dashboard-ui-mapping prefix: /k8s/ui/ rewrite: / tls: true service: kubernetes-dashboard.kube-system name: k8s-dashboard namespace: kubeflow spec: ports: - port: 443 targetPort: 8443 selector: k8s-app: kubernetes-dashboard type: ClusterIP