Last active
March 20, 2024 16:56
-
-
Save darrenangle/1deb90b2a5a0ce018b80bcfdf8ee277d to your computer and use it in GitHub Desktop.
vllm server wrapper with auto restart and vllm node script with exponential backoff for requests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import subprocess | |
| import time | |
| import re | |
| import signal | |
| import sys | |
| import select | |
| import os | |
| def start_server(): | |
| command = [ | |
| "/usr/bin/python3", "-m", "vllm.entrypoints.openai.api_server", | |
| "--model", "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B", | |
| "--max-model-len", "8192", | |
| "--enforce-eager" | |
| ] | |
| return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid) | |
| def read_output(process): | |
| error_pattern = re.compile(r"AsyncEngineDeadError: Task finished unexpectedly\.") | |
| while True: | |
| ready, _, _ = select.select([process.stdout, sys.stdin], [], []) | |
| if process.stdout in ready: | |
| output = process.stdout.readline() | |
| if output == '' and process.poll() is not None: | |
| break | |
| if output: | |
| print(output.strip()) | |
| if error_pattern.search(output): | |
| print("Error detected. Restarting the server...") | |
| terminate_process(process) | |
| return True | |
| if sys.stdin in ready: | |
| input() # Consume the input to prevent blocking | |
| print("Keyboard interrupt received. Terminating the server...") | |
| terminate_process(process) | |
| return False | |
| return False | |
| def terminate_process(process): | |
| os.killpg(os.getpgid(process.pid), signal.SIGTERM) | |
| time.sleep(5) # Wait for a short duration to allow the process to terminate | |
| if process.poll() is None: | |
| os.killpg(os.getpgid(process.pid), signal.SIGKILL) | |
| def main(): | |
| while True: | |
| process = start_server() | |
| restart = read_output(process) | |
| if not restart: | |
| break | |
| time.sleep(5) # Wait for 5 seconds before restarting | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import axios from "axios"; | |
| export class vllm { | |
| private static readonly API_URL = "http://localhost:8000/v1/chat/completions"; | |
| public async getCompletion( | |
| messages: Array<{ content: string; role: string }>, | |
| config: { | |
| model?: string; | |
| max_tokens?: number; | |
| temperature?: number; | |
| stop?: string[]; | |
| } = {} | |
| ): Promise<any> { | |
| const data = { | |
| model: config.model || "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B", | |
| messages: [...messages], | |
| max_tokens: config.max_tokens || 3072, | |
| temperature: config.temperature || 0.5, | |
| stop: config.stop || ["<|im_start|>", "<|im_end|>"], | |
| stream: false, | |
| }; | |
| const headers = { | |
| headers: { | |
| "Content-Type": "application/json", | |
| }, | |
| }; | |
| const makeRequest: any = async (retryCount = 10) => { | |
| try { | |
| const response = await axios.post(vllm.API_URL, data, headers); | |
| console.log( | |
| response.status, | |
| ": ", | |
| response.data.choices[0].message.content.trim() | |
| ); | |
| // Adjusting to OpenAI's API response structure | |
| return response.data.choices[0].message.content.trim(); | |
| } catch (error) { | |
| console.error(error); | |
| if (retryCount > 0) { | |
| const delay = Math.pow(2, 10 - retryCount) * 100; // Exponential backoff formula | |
| console.log(`Retrying after ${delay}ms...`); | |
| await new Promise((resolve) => setTimeout(resolve, delay)); | |
| return makeRequest(retryCount - 1); | |
| } else { | |
| throw new Error("Max retries reached. Request failed."); | |
| } | |
| } | |
| }; | |
| return makeRequest(); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment