Auto-Completion Style Text Generation with GPT-2 Model

from functools import lru_cache

from transformers import GPT2LMHeadModel, GPT2Tokenizer

import torch

class AutoComplete:

def __init__(self, model_name=“gpt2”):

“”“Initialize the auto-complete system.”“”

self.tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side=“left”)

self.model = GPT2LMHeadModel.from_pretrained(model_name)

self.device = “cuda” if torch.cuda.is_available() else “cpu”

self.model.to(self.device)

self.model.eval() # Set to evaluation mode

def get_completion(self, text, max_length=50):

“”“Generate completion for the input text.”“”

print(“**** Completion:”, text)

# Encode the input text

inputs = self.tokenizer(text, add_special_tokens=False, return_tensors=“pt”)

input_ids = inputs[“input_ids”].to(self.device)

attn_masks = inputs[“attention_mask”].to(self.device)

# Generate completion

with torch.no_grad():

outputs = self.model.generate(

input_ids,

attention_mask=attn_masks,

max_length=max_length,

num_return_sequences=1,

pad_token_id=self.tokenizer.eos_token_id,

do_sample=True,

temperature=0.7

)

# Decode and extract completion

full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

completion = full_text[len(text):]

return completion

class CachedAutoComplete(AutoComplete):

def __init__(self, cache_size=1000, **kwargs):

“”“Initialize with caching support.”“”

super().__init__(**kwargs)

self.get_completion = lru_cache(maxsize=cache_size)(

self.get_completion

)

class OptimizedAutoComplete(CachedAutoComplete):

def __init__(self, **kwargs):

“”“Initialize with optimizations.”“”

super().__init__(**kwargs)

self.tokenizer.pad_token = self.tokenizer.eos_token

if self.device == “cuda”:

self.model = self.model.half() # Use FP16 on GPU

# use eval mode and cuda graphs

self.model.eval()

def preprocess_batch(self, texts):

“”“Efficiently process multiple texts.”“”

# Tokenize all texts at once

inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors=“pt”)

return inputs.to(self.device)

def generate_batch(self, texts, max_length=50):

“”“Generate completions for multiple texts.”“”

# Preprocess batch

inputs = self.preprocess_batch(texts)

# Generate completions

with torch.no_grad():

outputs = self.model.generate(

inputs[“input_ids”],

attention_mask=inputs[“attention_mask”],

max_length=max_length,

num_return_sequences=1,

pad_token_id=self.tokenizer.eos_token_id,

do_sample=True,

temperature=0.7

)

# Decode completions

completions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

# Extract new text

results = []

for text, completion in zip(texts, completions):

results.append(completion[len(text):])

return results

# Example: Optimized batch completion

optimized_complete = OptimizedAutoComplete()

texts = [

“Machine learning is”,

“Deep neural networks can”,

“The training process involves”

]

completions = optimized_complete.generate_batch(texts)

for text, completion in zip(texts, completions):

print(f“\nInput: {text}”)

print(f“Completion: {completion}”)

Auto-Completion Style Text Generation with GPT-2 Model

Recent Articles

DeepSeek-Level AI? Train Your Own Reasoning Model in Just 7 Easy Steps!

Debugging the Dreaded NaN | Towards Data Science

What is zero trust? The security model for a distributed and risky era

‘Severance’ creator Dan Erickson breaks down Gemma’s ‘nightmarish’ experience at Lumon

Elevating AI Reasoning: The Art of Sampling for Learnability in LLM Training

Related Stories

Leave A Reply Cancel reply