Last active
February 1, 2025 03:06
-
-
Save dims/0ec2a44aeebf744d2175ca2c1428d701 to your computer and use it in GitHub Desktop.
Revisions
-
dims revised this gist
Feb 1, 2025 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,4 +11,5 @@ Links ===== - https://huggingface.co/deepseek-ai/DeepSeek-R1 - https://community.aws/content/2sJofoAecl6jVdDwVqglbZwKz2E/hosting-deepseek-r1-on-amazon-eks - https://apxml.com/posts/gpu-requirements-deepseek-r1 - https://unsloth.ai/blog/deepseekr1-dynamic -
dims created this gist
Feb 1, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,14 @@ Notes ===== Deploy using `kubectl apply -f deepseek.yaml` and watch the logs on the pod. When you see `vllm` listening on port 8000, use port forward so you can try running `python query.py` ``` kubectl port-forward svc/deepseek-r1-server 8000:8000 ``` Links ===== - https://huggingface.co/deepseek-ai/DeepSeek-R1 - https://community.aws/content/2sJofoAecl6jVdDwVqglbZwKz2E/hosting-deepseek-r1-on-amazon-eks - https://apxml.com/posts/gpu-requirements-deepseek-r1 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,93 @@ apiVersion: apps/v1 kind: Deployment metadata: name: deepseek-server annotations: description: "Deployment for DeepSeek server" spec: replicas: 1 selector: matchLabels: app: deepseek-server template: metadata: labels: app: deepseek-server spec: containers: - name: vllm-server image: vllm/vllm-openai:latest env: - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_HOME value: "/local/huggingface" - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token - name: MODEL_REPO value: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" command: ["/bin/bash"] args: - "-c" - > vllm serve ${MODEL_REPO} --host 0.0.0.0 --port 8000 --trust-remote-code resources: limits: cpu: "32" memory: 100G nvidia.com/gpu: "1" requests: cpu: "16" memory: 30G nvidia.com/gpu: "1" securityContext: privileged: true ports: - containerPort: 8000 startupProbe: periodSeconds: 10 failureThreshold: 720 httpGet: path: /health port: 8000 volumeMounts: - name: local-storage mountPath: /local - name: shm mountPath: /dev/shm volumes: - name: local-storage hostPath: path: /root/local type: DirectoryOrCreate - name: shm emptyDir: medium: Memory sizeLimit: "2Gi" --- apiVersion: v1 kind: Service metadata: name: deepseek-r1-server spec: selector: app: deepseek-server type: ClusterIP ports: - name: port-8000 port: 8000 targetPort: 8000 --- apiVersion: v1 kind: Secret metadata: name: hf-token-secret type: Opaque data: token: "<YOUR-HF-TOKEN-GOES-HERE>" This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,22 @@ #!/usr/bin/env python import openai client = openai.Client( base_url="http://127.0.0.1:8000/v1", api_key="EMPTY") # Get the Models models = client.models.list() print(models) # Chat completion response = client.chat.completions.create( model=models.data[0].id, messages=[ { "role": "user", "content": "What is Kubernetes?" }, ] ) print(response)