【官方教程】ChatGLM2-6B 部署與微調(diào)

Microsoft Windows [版本 10.0.19045.3086]
(c) Microsoft Corporation。保留所有權利。
C:\Users\Administrator>conda env list
# conda environments:
#
base???????????D:\Develop\anaconda3
?????????????H:\OpenAI\ChatGLM2-6B\ENV
C:\Users\Administrator>cd /d H:\OpenAI\ChatGLM2-6B
H:\OpenAI\ChatGLM2-6B>conda activate H:\OpenAI\ChatGLM2-6B\ENV
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
You are using a model of type chatglm to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
Loading checkpoint shards: 100%|█████████████████████████████████████████████| 7/7 [37:52<00:00, 324.60s/it]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ H:\OpenAI\ChatGLM2-6B\cli_demo.py:8 in <module>?????????????????????????│
│?????????????????????????????????????????????????│
│??5 import readline???????????????????????????????????????│
│??6???????????????????????????????????????????????│
│??7 tokenizer = AutoTokenizer.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True)???│
│ ??8 model = AutoModel.from_pretrained("THUDM\chatglm2-6b", trust_remote_code=True).quantize(??│
│??9 model = model.eval()????????????????????????????????????│
│??10???????????????????????????????????????????????│
│??11 os_name = platform.system()?????????????????????????????????│
│?????????????????????????????????????????????????│
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\modeling_chat │
│ glm.py:1109 in quantize?????????????????????????????????????│
│?????????????????????????????????????????????????│
│??1106 │??│???????????????????????????????????????????│
│??1107 │??│??self.config.quantization_bit = bits????????????????????????│
│??1108 │??│???????????????????????????????????????????│
│ ? 1109 │??│??self.transformer.encoder = quantize(self.transformer.encoder, bits, empty_init=e?│
│??1110 │??│??│??│??│??│??│??│??│??│??│??**kwargs)???????????????????│
│??1111 │??│??return self????????????????????????????????????│
│??1112??????????????????????????????????????????????│
│?????????????????????????????????????????????????│
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:155 in quantize????????????????????????????????????????│
│?????????????????????????????????????????????????│
│??152 def quantize(model, weight_bit_width, empty_init=False, device=None):???????????│
│??153 │??"""Replace fp16 linear with quantized linear"""????????????????????│
│??154 │??for layer in model.layers:???????????????????????????????│
│ ? 155 │??│??layer.self_attention.query_key_value = QuantizedLinear(??????????????│
│??156 │??│??│??weight_bit_width=weight_bit_width,???????????????????????│
│??157 │??│??│??weight=layer.self_attention.query_key_value.weight.to(torch.cuda.current_dev??│
│??158 │??│??│??bias=layer.self_attention.query_key_value.bias,????????????????│
│?????????????????????????????????????????????????│
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:139 in __init__????????????????????????????????????????│
│?????????????????????????????????????????????????│
│??136 │??│??│??self.weight_scale = weight.abs().max(dim=-1).values / ((2 ** (weight_bit_wid??│
│??137 │??│??│??self.weight = torch.round(weight / self.weight_scale[:, None]).to(torch.int8??│
│??138 │??│??│??if weight_bit_width == 4:???????????????????????????│
│ ? 139 │??│??│??│??self.weight = compress_int4_weight(self.weight)??????????????│
│??140 │??│???????????????????????????????????????????│
│??141 │??│??self.weight = Parameter(self.weight.to(device), requires_grad=False)????????│
│??142 │??│??self.weight_scale = Parameter(self.weight_scale.to(device), requires_grad=False)??│
│?????????????????????????????????????????????????│
│ C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\chatglm2-6b\quantization. │
│ py:78 in compress_int4_weight??????????????????????????????????│
│?????????????????????????????????????????????????│
│??75 │??│??gridDim = (n, 1, 1)????????????????????????????????│
│??76 │??│??blockDim = (min(round_up(m, 32), 1024), 1, 1)???????????????????│
│??77 │??│???????????????????????????????????????????│
│ ??78 │??│??kernels.int4WeightCompression(???????????????????????????│
│??79 │??│??│??gridDim,????????????????????????????????????│
│??80 │??│??│??blockDim,???????????????????????????????????│
│??81 │??│??│??0,???????????????????????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:48 in __call__??????│
│?????????????????????????????????????????????????│
│??45 │??│??│??sharedMemBytes : int, stream : cudart.cudaStream_t, params : List[Any] ) ->???│
│??46 │??│??assert len(gridDim) == 3??????????????????????????????│
│??47 │??│??assert len(blockDim) == 3??????????????????????????????│
│ ? 48 │??│??func = self._prepare_func()?????????????????????????????│
│??49 │??│????????????????????????????????????????????│
│??50 │??│??cuda.cuLaunchKernel(func,??????????????????????????????│
│??51 │??│??│??gridDim[0], gridDim[1], gridDim[2],???????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:40 in _prepare_func???│
│?????????????????????????????????????????????????│
│??37 │??│??cudart.cudaSetDevice(curr_device)??# ensure cudart context?????????????│
│??38 │??│??if curr_device not in self._funcs:?????????????????????????│
│??39 │??│??│??self._funcs[curr_device] = cuda.cuModuleGetFunction(??????????????│
│ ? 40 │??│??│??│??self._module.get_module(), self._func_name?????????????????│
│??41 │??│??│??)????????????????????????????????????????│
│??42 │??│??return self._funcs[curr_device]???????????????????????????│
│??43???????????????????????????????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\kernels\base.py:24 in get_module?????│
│?????????????????????????????????????????????????│
│??21 │??│??curr_device = cudart.cudaGetDevice()????????????????????????│
│??22 │??│??if curr_device not in self._module:?????????????????????????│
│??23 │??│??│??Device(curr_device).use()??# force initialize context?????????????│
│ ? 24 │??│??│??self._module[curr_device] = cuda.cuModuleLoadData(self._code)??????????│
│??25 │??│??return self._module[curr_device]??????????????????????????│
│??26???????????????????????????????????????????????│
│??27???????????????????????????????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\base.py:94 in wrapper??????│
│?????????????????????????????????????????????????│
│??91 │??│??│??def decorator(f):????????????????????????????????│
│??92 │??│??│??│??@wraps(f)??????????????????????????????????│
│??93 │??│??│??│??def wrapper(*args, **kwargs):????????????????????????│
│ ? 94 │??│??│??│??│??return f(*args, **kwargs)????????????????????????│
│??95 │??│??│??│??return wrapper???????????????????????????????│
│??96 │??│??│??return decorator????????????????????????????????│
│??97???????????????????????????????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:233 in cuModuleLoadData?│
│?????????????????????????????????????????????????│
│??230 @cuda.bind("cuModuleLoadData", [ctypes.POINTER(CUmodule), ctypes.c_void_p], CUresult)???│
│??231 def cuModuleLoadData(data : bytes) -> CUmodule:??????????????????????│
│??232 │??module = CUmodule()??????????????????????????????????│
│ ? 233 │??checkCUStatus(cuda.cuModuleLoadData(ctypes.byref(module), data))????????????│
│??234 │??return module?????????????????????????????????????│
│??235??????????????????????????????????????????????│
│??236 @cuda.bind("cuModuleUnload", [CUmodule], CUresult)?????????????????????│
│?????????????????????????????????????????????????│
│ H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\cpm_kernels\library\cuda.py:216 in checkCUStatus???│
│?????????????????????????????????????????????????│
│??213??????????????????????????????????????????????│
│??214 def checkCUStatus(error : int) -> None:??????????????????????????│
│??215 │??if error != CUDA_SUCCESS:???????????????????????????????│
│ ? 216 │??│??raise RuntimeError("CUDA Error: %s" % cuGetErrorString(error))???????????│
│??217??????????????????????????????????????????????│
│??218 @cuda.bind("cuDriverGetVersion", [ctypes.POINTER(ctypes.c_int)], CUresult)?????????│
│??219 def cuDriverGetVersion() -> int:??????????????????????????????│
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: CUDA Error: no kernel image is available for execution on the device
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
Traceback (most recent call last):
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict
??resolved_config_file = cached_file(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file
??resolved_file = hf_hub_download(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn
??validate_repo_id(arg_value)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id
??raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
?File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>
??model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained
??config, kwargs = AutoConfig.from_pretrained(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained
??config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict
??config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict
??raise EnvironmentError(
OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>python cli_demo.py
Traceback (most recent call last):
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 629, in _get_config_dict
??resolved_config_file = cached_file(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file
??resolved_file = hf_hub_download(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 110, in _inner_fn
??validate_repo_id(arg_value)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\huggingface_hub\utils\_validators.py", line 164, in validate_repo_id
??raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'THUDM\chatglm2-6b-int4'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
?File "H:\OpenAI\ChatGLM2-6B\cli_demo.py", line 8, in <module>
??model = AutoModel.from_pretrained("THUDM\chatglm2-6b-int4", trust_remote_code=True).quantize(4).cuda()
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\auto_factory.py", line 456, in from_pretrained
??config, kwargs = AutoConfig.from_pretrained(
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\models\auto\configuration_auto.py", line 944, in from_pretrained
??config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 574, in get_config_dict
??config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
?File "H:\OpenAI\ChatGLM2-6B\ENV\lib\site-packages\transformers\configuration_utils.py", line 650, in _get_config_dict
??raise EnvironmentError(
OSError: Can't load the configuration of 'THUDM\chatglm2-6b-int4'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'THUDM\chatglm2-6b-int4' is the correct path to a directory containing a config.json file
(H:\OpenAI\ChatGLM2-6B\ENV) H:\OpenAI\ChatGLM2-6B>
(H:\OpenAI\ChatGLM2-6B\ENV) C:\Users\Administrator>python
Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> x=torch.rand(4,6)
>>> print(x)
tensor([[0.4784, 0.6858, 0.0851, 0.2220, 0.0233, 0.4879],
????[0.2230, 0.8423, 0.2485, 0.6101, 0.2816, 0.4068],
????[0.5379, 0.6412, 0.2049, 0.8542, 0.3221, 0.2888],
????[0.4783, 0.4648, 0.6325, 0.8418, 0.9524, 0.1783]])
>>> torch.cuda.is_available()
True
>>> torch.cuda.current_device()
0
>>> torch.cuda.device_count()
1
>>> torch.__version__
'2.0.1'
>>> torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
True
>>> a=torch.Tensor([1,2])
>>> a=a.cuda()
>>> a
tensor([1., 2.], device='cuda:0')
>>>