### Implementing Automatic1111 style attention weights
### Note, GPT2 is very tempermental with this technique, seems to need a high temperature for even close to coherent output

import re
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def modify_attention_mask(prompt, model, tokenizer):
    tokens = []
    attention_modifiers = []
    add_space = False

    for token in re.split(r'\(|\)', prompt):
        if ':' in token:
            word, modifier = token.split(':')
            modifier = float(modifier.strip())
        else:
            word = token.strip()
            modifier = 1.0

        current_tokens = tokenizer.tokenize(word)
        if add_space and current_tokens:
            tokens.append('Ġ')  # Space token for GPT-2
            attention_modifiers.append(1.0)
        tokens.extend(current_tokens)
        attention_modifiers.extend([modifier] * len(current_tokens))
        add_space = True

    attention_mask = torch.tensor([attention_modifiers])
    input_ids = torch.tensor([tokenizer.convert_tokens_to_ids(tokens)])

    return input_ids, attention_mask


def custom_generate(prompt, model, tokenizer, **kwargs):
    input_ids, attention_mask = modify_attention_mask(prompt, model, tokenizer)
    print(attention_mask)

    # Set the modified attention mask
    model.config.attention_probs_dropout_prob = 0.0

    with torch.no_grad():
        output_sequences = model.generate(input_ids=input_ids, attention_mask=attention_mask, **kwargs)

    return tokenizer.decode(output_sequences[0], skip_special_tokens=True)

model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

prompt = "The (large house:1.0001) was situated on a hill. The buildings were made in an enormous block by the three towers of the four houses, with high ceilings of over one hundred and eight inches. They were built with stones and wood and all are from small scale timber."

generated_text = custom_generate(prompt, model, tokenizer, do_sample = True, temperature = 20.0, max_length=200)
print(generated_text)