Flux-Use-Compiled-Graph

Running on Zero

Flux-Use-Compiled-Graph / optimization.py

6580845 3 months ago

1.3 kB

	import spaces
	from typing import Any
	from typing import Callable
	from typing import ParamSpec
	import torch
	from torch.utils._pytree import tree_map

	P = ParamSpec("P")

	TRANSFORMER_HIDDEN_DIM = torch.export.Dim("hidden", min=4096, max=8212)

	# Specific to Flux. More about this is available in
	# https://huggingface.co/blog/zerogpu-aoti
	TRANSFORMER_DYNAMIC_SHAPES = {
	"hidden_states": {1: TRANSFORMER_HIDDEN_DIM},
	"img_ids": {0: TRANSFORMER_HIDDEN_DIM},
	}

	INDUCTOR_CONFIGS = {
	"conv_1x1_as_mm": True,
	"epilogue_fusion": False,
	"coordinate_descent_tuning": True,
	"coordinate_descent_check_all_directions": True,
	"max_autotune": True,
	"triton.cudagraphs": True,
	}


	def compile_transformer(pipeline: Callable[P, Any], args: P.args, *kwargs: P.kwargs):
	def f():
	with spaces.aoti_capture(pipeline.transformer) as call:
	pipeline(args, *kwargs)

	dynamic_shapes = tree_map(lambda v: None, call.kwargs)
	dynamic_shapes \|= TRANSFORMER_DYNAMIC_SHAPES

	exported = torch.export.export(
	mod=pipeline.transformer, args=call.args, kwargs=call.kwargs, dynamic_shapes=dynamic_shapes
	)
	return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)

	compiled_transformer = f()
	return compiled_transformer