update checkpoint and remove trust_remote_code
Browse files
README.md
CHANGED
|
@@ -51,15 +51,15 @@ pipeline_tag: text-generation
|
|
| 51 |
<a href="https://github.com/MiniMax-AI/MiniMax-01" target="_blank" style="margin: 2px;">
|
| 52 |
<img alt="GitHub" src="https://img.shields.io/badge/_GitHub-MinMax-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 53 |
</a>
|
| 54 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE-MODEL" style="margin: 2px;">
|
| 55 |
<img alt="Model License" src="https://img.shields.io/badge/_Model_License-Model_Agreement-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 56 |
</a>
|
| 57 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE-CODE" style="margin: 2px;">
|
| 58 |
<img alt="Code License" src="https://img.shields.io/badge/_Code_License-MIT-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 59 |
</a>
|
| 60 |
</div>
|
| 61 |
<div align="center" style="line-height: 1;">
|
| 62 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/figures/wechat-qrcode.jpeg" target="_blank" style="margin: 2px;">
|
| 63 |
WeChat
|
| 64 |
</a>
|
| 65 |
</div>
|
|
@@ -174,7 +174,7 @@ Here we provide a simple example of loading the tokenizer and model to generate
|
|
| 174 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, QuantoConfig, GenerationConfig
|
| 175 |
|
| 176 |
# load hf config
|
| 177 |
-
hf_config = AutoConfig.from_pretrained("MiniMaxAI/MiniMax-Text-01"
|
| 178 |
|
| 179 |
# quantization config, int8 is recommended
|
| 180 |
quantization_config = QuantoConfig(
|
|
@@ -200,7 +200,7 @@ for i in range(world_size):
|
|
| 200 |
device_map[f'model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
|
| 201 |
|
| 202 |
# load tokenizer
|
| 203 |
-
tokenizer = AutoTokenizer.from_pretrained("MiniMaxAI/MiniMax-Text-01")
|
| 204 |
prompt = "Hello!"
|
| 205 |
messages = [
|
| 206 |
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model."}]},
|
|
@@ -216,11 +216,10 @@ model_inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
|
| 216 |
|
| 217 |
# load bfloat16 model, move to device, and apply quantization
|
| 218 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
| 219 |
-
"MiniMaxAI/MiniMax-Text-01",
|
| 220 |
torch_dtype="bfloat16",
|
| 221 |
device_map=device_map,
|
| 222 |
quantization_config=quantization_config,
|
| 223 |
-
trust_remote_code=True,
|
| 224 |
offload_buffers=True,
|
| 225 |
)
|
| 226 |
|
|
|
|
| 51 |
<a href="https://github.com/MiniMax-AI/MiniMax-01" target="_blank" style="margin: 2px;">
|
| 52 |
<img alt="GitHub" src="https://img.shields.io/badge/_GitHub-MinMax-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 53 |
</a>
|
| 54 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/LICENSE-MODEL" style="margin: 2px;">
|
| 55 |
<img alt="Model License" src="https://img.shields.io/badge/_Model_License-Model_Agreement-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 56 |
</a>
|
| 57 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/LICENSE-CODE" style="margin: 2px;">
|
| 58 |
<img alt="Code License" src="https://img.shields.io/badge/_Code_License-MIT-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
| 59 |
</a>
|
| 60 |
</div>
|
| 61 |
<div align="center" style="line-height: 1;">
|
| 62 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/figures/wechat-qrcode.jpeg" target="_blank" style="margin: 2px;">
|
| 63 |
WeChat
|
| 64 |
</a>
|
| 65 |
</div>
|
|
|
|
| 174 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, QuantoConfig, GenerationConfig
|
| 175 |
|
| 176 |
# load hf config
|
| 177 |
+
hf_config = AutoConfig.from_pretrained("MiniMaxAI/MiniMax-Text-01-hf")
|
| 178 |
|
| 179 |
# quantization config, int8 is recommended
|
| 180 |
quantization_config = QuantoConfig(
|
|
|
|
| 200 |
device_map[f'model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
|
| 201 |
|
| 202 |
# load tokenizer
|
| 203 |
+
tokenizer = AutoTokenizer.from_pretrained("MiniMaxAI/MiniMax-Text-01-hf")
|
| 204 |
prompt = "Hello!"
|
| 205 |
messages = [
|
| 206 |
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model."}]},
|
|
|
|
| 216 |
|
| 217 |
# load bfloat16 model, move to device, and apply quantization
|
| 218 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
| 219 |
+
"MiniMaxAI/MiniMax-Text-01-hf",
|
| 220 |
torch_dtype="bfloat16",
|
| 221 |
device_map=device_map,
|
| 222 |
quantization_config=quantization_config,
|
|
|
|
| 223 |
offload_buffers=True,
|
| 224 |
)
|
| 225 |
|