Error while converting google flan T5 model to onnx

Question

I am looking to convert flan-T5 model downloaded from Hugging face into onnx format and make inference with the same.

My input data is the symptoms of disease and expected output is the Disease name

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import onnx

# Set the device to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl").to(device)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")

  

# Export the model to ONNX format
onnx_path = "flan-t5-xl.onnx"
dummy_input = tokenizer("What's the disease name in this text: Example text", return_tensors="pt", padding=True).to(device)
dummy_input_ids = dummy_input["input_ids"]
dummy_attention_mask = dummy_input["attention_mask"]
dummy_decoder_input_ids = tokenizer("<pad>", return_tensors="pt").input_ids.to(device)

with torch.no_grad():
    torch.onnx.export(
        model,
        (dummy_input_ids, dummy_attention_mask, dummy_decoder_input_ids),
        onnx_path,
        opset_version=11,
        input_names=["input_ids", "attention_mask", "decoder_input_ids"],
        output_names=["output"],
        dynamic_axes={
            "input_ids": {0: "batch_size"},
            "attention_mask": {0: "batch_size"},
            "decoder_input_ids": {0: "batch_size"},
            "output": {0: "batch_size", 1: "sequence_length"},
        },
    )
print(f"Model saved to {onnx_path}")

# Inference using the ONNX model on GPU

import onnxruntime

onnx_model = onnxruntime.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]

)

InvalidGraph: [ONNXRuntimeError] : 10 : INVALID_GRAPH : Load model from flan-t5-xl.onnx failed:This is an invalid model. Type Error: Type 'tensor(int64)' of input parameter (/decoder/block.0/layer.0/SelfAttention/Sub_output_0) of operator (Min) in node (/decoder/block.0/layer.0/SelfAttention/Min) is invalid.

input_text = input("Enter Disease/Symptom Detail: ")
inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
decoder_input_ids = tokenizer("<pad>", return_tensors="pt").input_ids.to(device)

onnx_inputs = {
    "input_ids": input_ids.cpu().numpy(),
    "attention_mask": attention_mask.cpu().numpy(),
    "decoder_input_ids": decoder_input_ids.cpu().numpy(),
}

onnx_output = onnx_model.run(None, onnx_inputs)[0]
decoded_output = tokenizer.decode(onnx_output[0], skip_special_tokens=True)

print('-' * 100)
print(f"Name of Disease based on Entered Text: {decoded_output}")

alvas · Accepted Answer · 2024-05-15 15:44:42Z

Use https://huggingface.co/datasets/bakks/flan-t5-onnx instead.

And to convert the google/flan-t5, see https://huggingface.co/datasets/bakks/flan-t5-onnx/blob/main/exportt5.py

from pathlib import Path
import transformers as t
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForSeq2SeqLM

# print out the version of the transformers library
print("transformers version:", t.__version__)



models = [
    #"google/flan-t5-small",
    #"google/flan-t5-base",
    #"google/flan-t5-large",
    "google/flan-t5-xl",
    "google/flan-t5-xxl",
]

for model_id in models:
    model_name = model_id.split("/")[1]
    onnx_path = Path("onnx/" + model_name)

    # load vanilla transformers and convert to onnx
    model = ORTModelForSeq2SeqLM.from_pretrained(model_id, from_transformers=True)
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    # save onnx checkpoint and tokenizer
    model.save_pretrained(onnx_path)
    tokenizer.save_pretrained(onnx_path)

Then try again:

import onnxruntime

onnx_model = onnxruntime.InferenceSession(
  onnx_path, providers=["CUDAExecutionProvider"]
)

Collectives™ on Stack Overflow

Error while converting google flan T5 model to onnx

1 Answer 1

Not the answer you're looking for? Browse other questions tagged
python
pytorch
huggingface-transformers
onnx
onnxruntime
or ask your own question.

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Not the answer you're looking for? Browse other questions tagged pythonpytorchhuggingface-transformersonnxonnxruntime or ask your own question.

Linked

Related

Not the answer you're looking for? Browse other questions tagged
python
pytorch
huggingface-transformers
onnx
onnxruntime
or ask your own question.