Auto-Completion Style Text Generation with GPT-2 Model


from functools import lru_cache

from transformers import GPT2LMHeadModel, GPT2Tokenizer

import torch

 

 

class AutoComplete:

    def __init__(self, model_name=“gpt2”):

        “”“Initialize the auto-complete system.”“”

        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side=“left”)

        self.model = GPT2LMHeadModel.from_pretrained(model_name)

        self.device = “cuda” if torch.cuda.is_available() else “cpu”

        self.model.to(self.device)

        self.model.eval()  # Set to evaluation mode

 

    def get_completion(self, text, max_length=50):

        “”“Generate completion for the input text.”“”

        print(“**** Completion:”, text)

        # Encode the input text

        inputs = self.tokenizer(text, add_special_tokens=False, return_tensors=“pt”)

        input_ids = inputs[“input_ids”].to(self.device)

        attn_masks = inputs[“attention_mask”].to(self.device)

 

        # Generate completion

        with torch.no_grad():

            outputs = self.model.generate(

                input_ids,

                attention_mask=attn_masks,

                max_length=max_length,

                num_return_sequences=1,

                pad_token_id=self.tokenizer.eos_token_id,

                do_sample=True,

                temperature=0.7

            )

 

        # Decode and extract completion

        full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        completion = full_text[len(text):]

 

        return completion

 

 

class CachedAutoComplete(AutoComplete):

    def __init__(self, cache_size=1000, **kwargs):

        “”“Initialize with caching support.”“”

        super().__init__(**kwargs)

        self.get_completion = lru_cache(maxsize=cache_size)(

            self.get_completion

        )

 

 

class OptimizedAutoComplete(CachedAutoComplete):

    def __init__(self, **kwargs):

        “”“Initialize with optimizations.”“”

        super().__init__(**kwargs)

        self.tokenizer.pad_token = self.tokenizer.eos_token

 

        if self.device == “cuda”:

            self.model = self.model.half()  # Use FP16 on GPU

 

        # use eval mode and cuda graphs

        self.model.eval()

 

    def preprocess_batch(self, texts):

        “”“Efficiently process multiple texts.”“”

        # Tokenize all texts at once

        inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors=“pt”)

        return inputs.to(self.device)

 

    def generate_batch(self, texts, max_length=50):

        “”“Generate completions for multiple texts.”“”

        # Preprocess batch

        inputs = self.preprocess_batch(texts)

 

        # Generate completions

        with torch.no_grad():

            outputs = self.model.generate(

                inputs[“input_ids”],

                attention_mask=inputs[“attention_mask”],

                max_length=max_length,

                num_return_sequences=1,

                pad_token_id=self.tokenizer.eos_token_id,

                do_sample=True,

                temperature=0.7

            )

 

        # Decode completions

        completions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

 

        # Extract new text

        results = []

        for text, completion in zip(texts, completions):

            results.append(completion[len(text):])

 

        return results

 

# Example: Optimized batch completion

optimized_complete = OptimizedAutoComplete()

texts = [

    “Machine learning is”,

    “Deep neural networks can”,

    “The training process involves”

]

completions = optimized_complete.generate_batch(texts)

for text, completion in zip(texts, completions):

    print(f“\nInput: {text}”)

    print(f“Completion: {completion}”)

Recent Articles

Related Stories

Leave A Reply

Please enter your comment!
Please enter your name here