darrenangle · March 20, 2024 16:56 · Mar 20, 2024 · Mar 20, 2024
diff --git a/vllm.py b/vllm.py
@@ -0,0 +1,54 @@
+import subprocess
+import time
+import re
+import signal
+import sys
+import select
+import os
+
+def start_server():
+    command = [
+        "/usr/bin/python3", "-m", "vllm.entrypoints.openai.api_server",
+        "--model", "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
+        "--max-model-len", "8192",
+        "--enforce-eager"
+    ]
+    return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid)
+
+def read_output(process):
+    error_pattern = re.compile(r"AsyncEngineDeadError: Task finished unexpectedly\.")
+    while True:
+        ready, _, _ = select.select([process.stdout, sys.stdin], [], [])
+        if process.stdout in ready:
+            output = process.stdout.readline()
+            if output == '' and process.poll() is not None:
+                break
+            if output:
+                print(output.strip())
+                if error_pattern.search(output):
+                    print("Error detected. Restarting the server...")
+                    terminate_process(process)
+                    return True
+        if sys.stdin in ready:
+            input()  # Consume the input to prevent blocking
+            print("Keyboard interrupt received. Terminating the server...")
+            terminate_process(process)
+            return False
+    return False
+
+def terminate_process(process):
+    os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+    time.sleep(5)  # Wait for a short duration to allow the process to terminate
+    if process.poll() is None:
+        os.killpg(os.getpgid(process.pid), signal.SIGKILL)
+
+def main():
+    while True:
+        process = start_server()
+        restart = read_output(process)
+        if not restart:
+            break
+        time.sleep(5)  # Wait for 5 seconds before restarting
+
+if __name__ == "__main__":
+    main()
diff --git a/vllm.ts b/vllm.ts
@@ -0,0 +1,55 @@
+import axios from "axios";
+
+export class vllm {
+  private static readonly API_URL = "http://localhost:8000/v1/chat/completions";
+
+  public async getCompletion(
+    messages: Array<{ content: string; role: string }>,
+    config: {
+      model?: string;
+      max_tokens?: number;
+      temperature?: number;
+      stop?: string[];
+    } = {}
+  ): Promise<any> {
+    const data = {
+      model: config.model || "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
+      messages: [...messages],
+      max_tokens: config.max_tokens || 3072,
+      temperature: config.temperature || 0.5,
+      stop: config.stop || ["<|im_start|>", "<|im_end|>"],
+      stream: false,
+    };
+
+    const headers = {
+      headers: {
+        "Content-Type": "application/json",
+      },
+    };
+
+    const makeRequest: any = async (retryCount = 10) => {
+      try {
+        const response = await axios.post(vllm.API_URL, data, headers);
+        console.log(
+          response.status,
+          ": ",
+          response.data.choices[0].message.content.trim()
+        );
+        // Adjusting to OpenAI's API response structure
+        return response.data.choices[0].message.content.trim();
+      } catch (error) {
+        console.error(error);
+        if (retryCount > 0) {
+          const delay = Math.pow(2, 10 - retryCount) * 100; // Exponential backoff formula
+          console.log(`Retrying after ${delay}ms...`);
+          await new Promise((resolve) => setTimeout(resolve, delay));
+          return makeRequest(retryCount - 1);
+        } else {
+          throw new Error("Max retries reached. Request failed.");
+        }
+      }
+    };
+
+    return makeRequest();
+  }
+}