Spaces:
Running
on
Zero
Running
on
Zero
debug zerogpu timeout error
Browse files
app.py
CHANGED
|
@@ -65,7 +65,6 @@ def _load_model_processor(args):
|
|
| 65 |
attn_implementation="eager", # ZeroGPU 必须用 eager,因为初始在 CPU
|
| 66 |
torch_dtype=torch.bfloat16,
|
| 67 |
device_map="auto", # 改回 auto,让 ZeroGPU 自动管理
|
| 68 |
-
token=os.environ.get('HF_TOKEN')
|
| 69 |
)
|
| 70 |
|
| 71 |
# 关键:禁用梯度检查点(如果启用会导致极慢)
|
|
@@ -244,7 +243,6 @@ def _launch_demo(args, model, processor):
|
|
| 244 |
generated_ids = model.generate(
|
| 245 |
**inputs,
|
| 246 |
max_new_tokens=1024,
|
| 247 |
-
repetition_penalty=1.03,
|
| 248 |
do_sample=False
|
| 249 |
)
|
| 250 |
print(f"[DEBUG] model.generate() 返回,耗时: {time.time() - generate_call_start:.2f}s")
|
|
|
|
| 65 |
attn_implementation="eager", # ZeroGPU 必须用 eager,因为初始在 CPU
|
| 66 |
torch_dtype=torch.bfloat16,
|
| 67 |
device_map="auto", # 改回 auto,让 ZeroGPU 自动管理
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# 关键:禁用梯度检查点(如果启用会导致极慢)
|
|
|
|
| 243 |
generated_ids = model.generate(
|
| 244 |
**inputs,
|
| 245 |
max_new_tokens=1024,
|
|
|
|
| 246 |
do_sample=False
|
| 247 |
)
|
| 248 |
print(f"[DEBUG] model.generate() 返回,耗时: {time.time() - generate_call_start:.2f}s")
|