essobi@thegreenbox:~/LLaMA-Factory$ more OKG.py import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer checkpoint = "meta-llama/Meta-Llama-3.1-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained( checkpoint, torch_dtype=torch.bfloat16, device_map="auto" ) # Define weather-related functions def get_current_temperature(location: str, unit: str) -> float: """ Get the current temperature at a location. Args: location: The location to get the temperature for, in the format "City, Country" unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"]) Returns: The current temperature at the specified location in the specified units, as a float. """ return 22.0 # A real function should probably actually get the temperature! def get_current_wind_speed(location: str) -> float: """ Get the current wind speed in km/h at a given location. Args: location: The location to get the temperature for, in the format "City, Country" Returns: The current wind speed at the given location in km/h, as a float. """ return 6.0 # A real function should probably actually get the wind speed! # Set up tools and initial messages tools = [get_current_temperature, get_current_wind_speed] messages = [ { "role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location.", }, {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}, ] # Function to generate response def generate_response(messages, tools): inputs = tokenizer.apply_chat_template( messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt", ) print(tokenizer.decode(inputs["input_ids"][0])) # Generate initial response print("Initial response:") print(generate_response(messages, tools))