Skip to content

Instantly share code, notes, and snippets.

@pdmack
Created February 6, 2018 23:03
Show Gist options
  • Select an option

  • Save pdmack/69716f16a03fcfddf3808309bad9be9c to your computer and use it in GitHub Desktop.

Select an option

Save pdmack/69716f16a03fcfddf3808309bad9be9c to your computer and use it in GitHub Desktop.

Revisions

  1. Pete MacKinnon created this gist Feb 6, 2018.
    636 changes: 636 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,636 @@
    ---
    apiVersion: v1
    data:
    jupyterhub_config.py: |
    import json
    import os
    from kubespawner.spawner import KubeSpawner
    from jhub_remote_user_authenticator.remote_user_auth import RemoteUserAuthenticator
    from oauthenticator.github import GitHubOAuthenticator

    class KubeFormSpawner(KubeSpawner):
    def _options_form_default(self):
    return '''
    <label for='image'>Image</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
    <input name='image' placeholder='repo/image:tag'></input>
    <br/><br/>

    <label for='cpu_guarantee'>CPU</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
    <input name='cpu_guarantee' placeholder='200m, 1.0, 2.5, etc'></input>
    <br/><br/>

    <label for='mem_guarantee'>Memory</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
    <input name='mem_guarantee' placeholder='100Mi, 1.5Gi'></input>
    <br/><br/>

    <label for='extra_resource_limits'>Extra Resource Limits</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
    <input name='extra_resource_limits' placeholder='{'nvidia.com/gpu': '3'}'></input>
    <br/><br/>
    '''

    def options_from_form(self, formdata):
    options = {}
    options['image'] = formdata.get('image', [''])[0].strip()
    options['cpu_guarantee'] = formdata.get('cpu_guarantee', [''])[0].strip()
    options['mem_guarantee'] = formdata.get('mem_guarantee', [''])[0].strip()
    options['extra_resource_limits'] = formdata.get('extra_resource_limits', [''])[0].strip()
    return options

    @property
    def singleuser_image_spec(self):
    image = 'gcr.io/kubeflow/tensorflow-notebook-cpu'
    if self.user_options.get('image'):
    image = self.user_options['image']
    return image

    @property
    def cpu_guarantee(self):
    cpu = '500m'
    if self.user_options.get('cpu_guarantee'):
    cpu = self.user_options['cpu_guarantee']
    return cpu

    @property
    def mem_guarantee(self):
    mem = '1Gi'
    if self.user_options.get('mem_guarantee'):
    mem = self.user_options['mem_guarantee']
    return mem

    @property
    def extra_resource_limits(self):
    extra = ''
    if self.user_options.get('extra_resource_limits'):
    extra = json.loads(self.user_options['extra_resource_limits'])
    return extra

    ###################################################
    ### JupyterHub Options
    ###################################################
    c.JupyterHub.ip = '0.0.0.0'
    c.JupyterHub.hub_ip = '0.0.0.0'
    # Don't try to cleanup servers on exit - since in general for k8s, we want
    # the hub to be able to restart without losing user containers
    c.JupyterHub.cleanup_servers = False
    ###################################################

    ###################################################
    ### Spawner Options
    ###################################################
    c.JupyterHub.spawner_class = KubeFormSpawner
    c.KubeSpawner.singleuser_image_spec = 'gcr.io/kubeflow/tensorflow-notebook'
    c.KubeSpawner.cmd = 'start-singleuser.sh'
    c.KubeSpawner.args = ['--allow-root']
    # First pulls can be really slow, so let's give it a big timeout
    c.KubeSpawner.start_timeout = 60 * 10

    ###################################################
    ### Persistent volume options
    ###################################################
    # Using persistent storage requires a default storage class.
    # TODO(jlewi): Verify this works on minikube.
    # TODO(jlewi): Should we set c.KubeSpawner.singleuser_fs_gid = 1000
    # see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944
    c.KubeSpawner.user_storage_pvc_ensure = True
    # How much disk space do we want?
    c.KubeSpawner.user_storage_capacity = '10Gi'
    c.KubeSpawner.pvc_name_template = 'claim-{username}{servername}'

    ######## Authenticator ######
    c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator'
    kind: ConfigMap
    metadata:
    name: jupyterhub-config
    namespace: kubeflow
    ---
    apiVersion: v1
    kind: Service
    metadata:
    labels:
    app: tf-hub
    name: tf-hub-0
    namespace: kubeflow
    spec:
    clusterIP: None
    ports:
    - name: hub
    port: 8000
    selector:
    app: tf-hub
    ---
    apiVersion: v1
    kind: Service
    metadata:
    labels:
    app: tf-hub
    name: tf-hub-0
    namespace: kubeflow
    spec:
    clusterIP: None
    ports:
    - name: hub
    port: 80
    targetPort: 8000
    selector:
    app: tf-hub
    type: ClusterIP
    ---
    apiVersion: apps/v1beta1
    kind: StatefulSet
    metadata:
    name: tf-hub
    namespace: kubeflow
    spec:
    replicas: 1
    serviceName: ""
    template:
    metadata:
    labels:
    app: tf-hub
    spec:
    containers:
    - command:
    - jupyterhub
    - -f
    - /etc/config/jupyterhub_config.py
    image: gcr.io/kubeflow/jupyterhub-k8s:1.0.1
    name: tf-hub
    ports:
    - containerPort: 8000
    - containerPort: 8081
    volumeMounts:
    - mountPath: /etc/config
    name: config-volume
    serviceAccountName: jupyter-hub
    volumes:
    - configMap:
    name: jupyterhub-config
    name: config-volume
    updateStrategy:
    type: RollingUpdate
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: Role
    metadata:
    name: jupyter-role
    namespace: kubeflow
    rules:
    - apiGroups:
    - '*'
    resources:
    - '*'
    verbs:
    - '*'
    ---
    apiVersion: v1
    kind: ServiceAccount
    metadata:
    labels:
    app: jupyter-hub
    name: jupyter-hub
    namespace: kubeflow
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: RoleBinding
    metadata:
    name: jupyter-role
    namespace: kubeflow
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: Role
    name: jupyter-role
    subjects:
    - kind: ServiceAccount
    name: jupyter-hub
    namespace: kubeflow
    ---
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
    name: tf-job-operator
    namespace: kubeflow
    spec:
    replicas: 1
    template:
    metadata:
    labels:
    name: tf-job-operator
    spec:
    containers:
    - command:
    - /opt/mlkube/tf_operator
    - --controller-config-file=/etc/config/controller_config_file.yaml
    - --alsologtostderr
    - -v=1
    env:
    - name: MY_POD_NAMESPACE
    valueFrom:
    fieldRef:
    fieldPath: metadata.namespace
    - name: MY_POD_NAME
    valueFrom:
    fieldRef:
    fieldPath: metadata.name
    image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44
    name: tf-job-operator
    volumeMounts:
    - mountPath: /etc/config
    name: config-volume
    serviceAccountName: tf-job-operator
    volumes:
    - configMap:
    name: tf-job-operator-config
    name: config-volume
    ---
    apiVersion: v1
    data:
    controller_config_file.yaml: |-
    {
    "grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py"
    }
    kind: ConfigMap
    metadata:
    name: tf-job-operator-config
    namespace: kubeflow
    ---
    apiVersion: v1
    kind: ServiceAccount
    metadata:
    labels:
    app: tf-job-operator
    name: tf-job-operator
    namespace: kubeflow
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRole
    metadata:
    labels:
    app: tf-job-operator
    name: tf-job-operator
    rules:
    - apiGroups:
    - tensorflow.org
    resources:
    - tfjobs
    verbs:
    - '*'
    - apiGroups:
    - apiextensions.k8s.io
    resources:
    - customresourcedefinitions
    verbs:
    - '*'
    - apiGroups:
    - storage.k8s.io
    resources:
    - storageclasses
    verbs:
    - '*'
    - apiGroups:
    - batch
    resources:
    - jobs
    verbs:
    - '*'
    - apiGroups:
    - ""
    resources:
    - configmaps
    - pods
    - services
    - endpoints
    - persistentvolumeclaims
    - events
    verbs:
    - '*'
    - apiGroups:
    - apps
    - extensions
    resources:
    - deployments
    verbs:
    - '*'
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRoleBinding
    metadata:
    labels:
    app: tf-job-operator
    name: tf-job-operator
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: tf-job-operator
    subjects:
    - kind: ServiceAccount
    name: tf-job-operator
    namespace: kubeflow
    ---
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
    name: tf-job-dashboard
    namespace: kubeflow
    spec:
    template:
    metadata:
    labels:
    name: tf-job-dashboard
    spec:
    containers:
    - command:
    - /opt/tensorflow_k8s/dashboard/backend
    image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44
    name: tf-job-dashboard
    ports:
    - containerPort: 8080
    serviceAccountName: tf-job-dashboard
    ---
    apiVersion: v1
    kind: Service
    metadata:
    annotations:
    getambassador.io/config: |-
    ---
    apiVersion: ambassador/v0
    kind: Mapping
    name: tfjobs-ui-mapping
    prefix: /tfjobs/ui/
    rewrite: /
    service: tf-job-dashboard.kubeflow
    name: tf-job-dashboard
    namespace: kubeflow
    spec:
    ports:
    - port: 80
    targetPort: 8080
    selector:
    name: tf-job-dashboard
    type: ClusterIP
    ---
    apiVersion: v1
    kind: ServiceAccount
    metadata:
    labels:
    app: tf-job-dashboard
    name: tf-job-dashboard
    namespace: kubeflow
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRole
    metadata:
    labels:
    app: tf-job-dashboard
    name: tf-job-dashboard
    rules:
    - apiGroups:
    - tensorflow.org
    resources:
    - tfjobs
    verbs:
    - '*'
    - apiGroups:
    - apiextensions.k8s.io
    resources:
    - customresourcedefinitions
    verbs:
    - '*'
    - apiGroups:
    - storage.k8s.io
    resources:
    - storageclasses
    verbs:
    - '*'
    - apiGroups:
    - batch
    resources:
    - jobs
    verbs:
    - '*'
    - apiGroups:
    - ""
    resources:
    - configmaps
    - pods
    - services
    - endpoints
    - persistentvolumeclaims
    - events
    verbs:
    - '*'
    - apiGroups:
    - apps
    - extensions
    resources:
    - deployments
    verbs:
    - '*'
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRoleBinding
    metadata:
    labels:
    app: tf-job-dashboard
    name: tf-job-dashboard
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: tf-job-dashboard
    subjects:
    - kind: ServiceAccount
    name: tf-job-dashboard
    namespace: kubeflow
    ---
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
    name: tf-job-dashboard
    namespace: kubeflow
    spec:
    template:
    metadata:
    labels:
    name: tf-job-dashboard
    spec:
    containers:
    - command:
    - /opt/tensorflow_k8s/dashboard/backend
    image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20180117-04425d9-dirty-e3b0c44
    name: tf-job-dashboard
    ports:
    - containerPort: 8080
    serviceAccountName: tf-job-dashboard
    ---
    apiVersion: v1
    kind: Service
    metadata:
    annotations:
    getambassador.io/config: |-
    ---
    apiVersion: ambassador/v0
    kind: Mapping
    name: tfjobs-ui-mapping
    prefix: /tfjobs/ui/
    rewrite: /
    service: tf-job-dashboard.kubeflow
    name: tf-job-dashboard
    namespace: kubeflow
    spec:
    ports:
    - port: 80
    targetPort: 8080
    selector:
    name: tf-job-dashboard
    type: ClusterIP
    ---
    apiVersion: v1
    kind: Service
    metadata:
    labels:
    service: ambassador
    name: ambassador
    namespace: kubeflow
    spec:
    ports:
    - name: ambassador
    port: 80
    targetPort: 80
    selector:
    service: ambassador
    type: ClusterIP
    ---
    apiVersion: v1
    kind: Service
    metadata:
    labels:
    service: ambassador-admin
    name: ambassador-admin
    namespace: kubeflow
    spec:
    ports:
    - name: ambassador-admin
    port: 8877
    targetPort: 8877
    selector:
    service: ambassador
    type: ClusterIP
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRole
    metadata:
    name: ambassador
    rules:
    - apiGroups:
    - ""
    resources:
    - services
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - ""
    resources:
    - configmaps
    verbs:
    - create
    - update
    - patch
    - get
    - list
    - watch
    - apiGroups:
    - ""
    resources:
    - secrets
    verbs:
    - get
    - list
    - watch
    ---
    apiVersion: v1
    kind: ServiceAccount
    metadata:
    name: ambassador
    namespace: kubeflow
    ---
    apiVersion: rbac.authorization.k8s.io/v1beta1
    kind: ClusterRoleBinding
    metadata:
    name: ambassador
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: ambassador
    subjects:
    - kind: ServiceAccount
    name: ambassador
    namespace: kubeflow
    ---
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
    name: ambassador
    namespace: kubeflow
    spec:
    replicas: 3
    template:
    metadata:
    labels:
    service: ambassador
    spec:
    containers:
    - env:
    - name: AMBASSADOR_NAMESPACE
    valueFrom:
    fieldRef:
    fieldPath: metadata.namespace
    image: quay.io/datawire/ambassador:0.22.0
    imagePullPolicy: Always
    livenessProbe:
    httpGet:
    path: /ambassador/v0/check_alive
    port: 8877
    initialDelaySeconds: 30
    periodSeconds: 30
    name: ambassador
    readinessProbe:
    httpGet:
    path: /ambassador/v0/check_ready
    port: 8877
    initialDelaySeconds: 30
    periodSeconds: 30
    resources:
    limits:
    cpu: 1
    memory: 400Mi
    requests:
    cpu: 200m
    memory: 100Mi
    - image: quay.io/datawire/statsd:0.22.0
    name: statsd
    restartPolicy: Always
    serviceAccountName: ambassador
    ---
    apiVersion: v1
    kind: Service
    metadata:
    annotations:
    getambassador.io/config: |-
    ---
    apiVersion: ambassador/v0
    kind: Mapping
    name: k8s-dashboard-ui-mapping
    prefix: /k8s/ui/
    rewrite: /
    tls: true
    service: kubernetes-dashboard.kube-system
    name: k8s-dashboard
    namespace: kubeflow
    spec:
    ports:
    - port: 443
    targetPort: 8443
    selector:
    k8s-app: kubernetes-dashboard
    type: ClusterIP