@inproceedings{ author={Saleh Ashkboos and Ilia Markov and Elias Frantar and Tingxuan Zhong and Xincheng Wang and Jie Ren and Torsten Hoefler and Dan Alistarh}, title={{QUIK: Towards End-to-End 4-Bit Inference on Generative Large Language Models}}, year={2023}, month={Nov.}, doi={10.48550/arXiv.2310.09259}, }