@inproceedings{ author={Saleh Ashkboos and Ilia Markov and Elias Frantar and Tingxuan Zhong and Xincheng Wang and Jie Ren and Torsten Hoefler and Dan Alistarh}, title={{QUIK: Towards End-to-End 4-Bit Inference on Generative Large Language Models}}, year={2024}, month={Nov.}, pages={3355-3371}, booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP'24)}, location={Miami, FL, USA}, publisher={Association for Computational Linguistics}, doi={10.48550/arXiv.2310.09259}, }