Last active
          March 20, 2024 16:56 
        
      - 
      
- 
        Save darrenangle/1deb90b2a5a0ce018b80bcfdf8ee277d to your computer and use it in GitHub Desktop. 
Revisions
- 
        darrenangle revised this gist Mar 20, 2024 . No changes.There are no files selected for viewing
- 
        darrenangle created this gist Mar 20, 2024 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,54 @@ import subprocess import time import re import signal import sys import select import os def start_server(): command = [ "/usr/bin/python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B", "--max-model-len", "8192", "--enforce-eager" ] return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid) def read_output(process): error_pattern = re.compile(r"AsyncEngineDeadError: Task finished unexpectedly\.") while True: ready, _, _ = select.select([process.stdout, sys.stdin], [], []) if process.stdout in ready: output = process.stdout.readline() if output == '' and process.poll() is not None: break if output: print(output.strip()) if error_pattern.search(output): print("Error detected. Restarting the server...") terminate_process(process) return True if sys.stdin in ready: input() # Consume the input to prevent blocking print("Keyboard interrupt received. Terminating the server...") terminate_process(process) return False return False def terminate_process(process): os.killpg(os.getpgid(process.pid), signal.SIGTERM) time.sleep(5) # Wait for a short duration to allow the process to terminate if process.poll() is None: os.killpg(os.getpgid(process.pid), signal.SIGKILL) def main(): while True: process = start_server() restart = read_output(process) if not restart: break time.sleep(5) # Wait for 5 seconds before restarting if __name__ == "__main__": main() This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,55 @@ import axios from "axios"; export class vllm { private static readonly API_URL = "http://localhost:8000/v1/chat/completions"; public async getCompletion( messages: Array<{ content: string; role: string }>, config: { model?: string; max_tokens?: number; temperature?: number; stop?: string[]; } = {} ): Promise<any> { const data = { model: config.model || "hf-models/NousResearch-Hermes-2-Pro-Mistral-7B", messages: [...messages], max_tokens: config.max_tokens || 3072, temperature: config.temperature || 0.5, stop: config.stop || ["<|im_start|>", "<|im_end|>"], stream: false, }; const headers = { headers: { "Content-Type": "application/json", }, }; const makeRequest: any = async (retryCount = 10) => { try { const response = await axios.post(vllm.API_URL, data, headers); console.log( response.status, ": ", response.data.choices[0].message.content.trim() ); // Adjusting to OpenAI's API response structure return response.data.choices[0].message.content.trim(); } catch (error) { console.error(error); if (retryCount > 0) { const delay = Math.pow(2, 10 - retryCount) * 100; // Exponential backoff formula console.log(`Retrying after ${delay}ms...`); await new Promise((resolve) => setTimeout(resolve, delay)); return makeRequest(retryCount - 1); } else { throw new Error("Max retries reached. Request failed."); } } }; return makeRequest(); } }