darrenangle · March 20, 2024 16:56
diff --git a/vllm.py b/vllm.py
 import subprocess
 import time
 import re
 import signal
 import sys
 import select
 import os

 def start_server():
    command = [
        "/usr/bin/python3", "-m", "vllm.entrypoints.openai.api_server",
        "--model", "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
        "--max-model-len", "8192",
        "--enforce-eager"
    ]
    return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid)

 def read_output(process):
    error_pattern = re.compile(r"AsyncEngineDeadError: Task finished unexpectedly\.")
    while True:
        ready, _, _ = select.select([process.stdout, sys.stdin], [], [])
        if process.stdout in ready:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())
                if error_pattern.search(output):
                    print("Error detected. Restarting the server...")
                    terminate_process(process)
                    return True
        if sys.stdin in ready:
            input()  # Consume the input to prevent blocking
            print("Keyboard interrupt received. Terminating the server...")
            terminate_process(process)
            return False
    return False

 def terminate_process(process):
    os.killpg(os.getpgid(process.pid), signal.SIGTERM)
    time.sleep(5)  # Wait for a short duration to allow the process to terminate
    if process.poll() is None:
        os.killpg(os.getpgid(process.pid), signal.SIGKILL)

 def main():
    while True:
        process = start_server()
        restart = read_output(process)
        if not restart:
            break
        time.sleep(5)  # Wait for 5 seconds before restarting

 if __name__ == "__main__":
    main()
diff --git a/vllm.ts b/vllm.ts
 import axios from "axios";

 export class vllm {
  private static readonly API_URL = "http://localhost:8000/v1/chat/completions";

  public async getCompletion(
    messages: Array<{ content: string; role: string }>,
    config: {
      model?: string;
      max_tokens?: number;
      temperature?: number;
      stop?: string[];
    } = {}
  ): Promise<any> {
    const data = {
      model: config.model || "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
      messages: [...messages],
      max_tokens: config.max_tokens || 3072,
      temperature: config.temperature || 0.5,
      stop: config.stop || ["<|im_start|>", "<|im_end|>"],
      stream: false,
    };

    const headers = {
      headers: {
        "Content-Type": "application/json",
      },
    };

    const makeRequest: any = async (retryCount = 10) => {
      try {
        const response = await axios.post(vllm.API_URL, data, headers);
        console.log(
          response.status,
          ": ",
          response.data.choices[0].message.content.trim()
        );
        // Adjusting to OpenAI's API response structure
        return response.data.choices[0].message.content.trim();
      } catch (error) {
        console.error(error);
        if (retryCount > 0) {
          const delay = Math.pow(2, 10 - retryCount) * 100; // Exponential backoff formula
          console.log(`Retrying after ${delay}ms...`);
          await new Promise((resolve) => setTimeout(resolve, delay));
          return makeRequest(retryCount - 1);
        } else {
          throw new Error("Max retries reached. Request failed.");
        }
      }
    };

    return makeRequest();
  }
 }
	import subprocess
	import time
	import re
	import signal
	import sys
	import select
	import os

	def start_server():
	command = [
	"/usr/bin/python3", "-m", "vllm.entrypoints.openai.api_server",
	"--model", "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
	"--max-model-len", "8192",
	"--enforce-eager"
	]
	return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid)

	def read_output(process):
	error_pattern = re.compile(r"AsyncEngineDeadError: Task finished unexpectedly\.")
	while True:
	ready, _, _ = select.select([process.stdout, sys.stdin], [], [])
	if process.stdout in ready:
	output = process.stdout.readline()
	if output == '' and process.poll() is not None:
	break
	if output:
	print(output.strip())
	if error_pattern.search(output):
	print("Error detected. Restarting the server...")
	terminate_process(process)
	return True
	if sys.stdin in ready:
	input() # Consume the input to prevent blocking
	print("Keyboard interrupt received. Terminating the server...")
	terminate_process(process)
	return False
	return False

	def terminate_process(process):
	os.killpg(os.getpgid(process.pid), signal.SIGTERM)
	time.sleep(5) # Wait for a short duration to allow the process to terminate
	if process.poll() is None:
	os.killpg(os.getpgid(process.pid), signal.SIGKILL)

	def main():
	while True:
	process = start_server()
	restart = read_output(process)
	if not restart:
	break
	time.sleep(5) # Wait for 5 seconds before restarting

	if __name__ == "__main__":
	main()
	import axios from "axios";

	export class vllm {
	private static readonly API_URL = "http://localhost:8000/v1/chat/completions";

	public async getCompletion(
	messages: Array<{ content: string; role: string }>,
	config: {
	model?: string;
	max_tokens?: number;
	temperature?: number;
	stop?: string[];
	} = {}
	): Promise<any> {
	const data = {
	model: config.model \|\| "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B",
	messages: [...messages],
	max_tokens: config.max_tokens \|\| 3072,
	temperature: config.temperature \|\| 0.5,
	stop: config.stop \|\| ["<\|im_start\|>", "<\|im_end\|>"],
	stream: false,
	};

	const headers = {
	headers: {
	"Content-Type": "application/json",
	},
	};

	const makeRequest: any = async (retryCount = 10) => {
	try {
	const response = await axios.post(vllm.API_URL, data, headers);
	console.log(
	response.status,
	": ",
	response.data.choices[0].message.content.trim()
	);
	// Adjusting to OpenAI's API response structure
	return response.data.choices[0].message.content.trim();
	} catch (error) {
	console.error(error);
	if (retryCount > 0) {
	const delay = Math.pow(2, 10 - retryCount) * 100; // Exponential backoff formula
	console.log(`Retrying after ${delay}ms...`);
	await new Promise((resolve) => setTimeout(resolve, delay));
	return makeRequest(retryCount - 1);
	} else {
	throw new Error("Max retries reached. Request failed.");
	}
	}
	};

	return makeRequest();
	}
	}