Preprints Conference and Journal Papers Preprints 2024 Input-sensitive dense-sparse primitive compositions for GNN acceleration Damitha Lenadora , Vimarsh Sathia , Gerasimos Gerogiannis , and 3 more authors 2024 arXiv FLuRKA: Fast fused Low-Rank & Kernel Attention Ahan Gupta , Yueming Yuan , Yanqi Zhou , and 1 more author 2024 arXiv Conference and Journal Papers 2024 MLSys COMET: X86 Cost Model Explanation Framework Isha Chaudhary , Alex Renda , Charith Mendis, and 1 more author 2024 arXiv PDF SIGMOD Dias: Dynamic Rewriting of Pandas Code Stefanos Baziotis , Daniel Kang , and Charith Mendis In Proceedings of ACM on Management of Data (SIGMOD) , 2024 arXiv PDF ASPLOS Hydride: A Retargetable and Extensible Synthesis-based Compiler for Modern Hardware Architectures Akash Kothari , Abdul Rafae Noor , Muchen Xu , and 6 more authors In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , 2024 PDF ASPLOS Two-Face: Combining Collective and One-Sided Accesses for Efficient Distributed SpMM Charles Block , Gerasimos Gerogiannis , Charith Mendis, and 2 more authors In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , 2024 PDF ASPLOS TGLite: A Lightweight Programming Framework for Continuous-Time Temporal Graph Neural Networks Yufeng Wang , and Charith Mendis In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , 2024 PDF 2023 NeurIPS Learning Large Graph Property Prediction via Graph Segment Training Kaidi Cao , Mangpo Phothilimthana , Sami Abu-El-Haija , and 5 more authors In Advances in Neural Information Processing Systems , 2023 Bib PDF @inproceedings{neurips23-gst, author = {Cao, Kaidi and Phothilimthana, Mangpo and Abu-El-Haija, Sami and Zelle, Dustin and Zhou, Yanqi and Mendis, Charith and Leskovec, Jure and Perozzi, Bryan}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Oh, A. and Neumann, T. and Globerson, A. and Saenko, K. and Hardt, M. and Levine, S.}, pages = {23345--23361}, publisher = {Curran Associates, Inc.}, title = {Learning Large Graph Property Prediction via Graph Segment Training}, url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/48f8143cebe113f4596e1781771578cd-Paper-Conference.pdf}, volume = {36}, year = {2023}, } NeurIPS TpuGraphs: A Performance Prediction Dataset on Large Tensor Computational Graphs Mangpo Phothilimthana , Sami Abu-El-Haija , Kaidi Cao , and 4 more authors In Advances in Neural Information Processing Systems , 2023 Bib PDF @inproceedings{neurips23-tpugraphs, author = {Phothilimthana, Mangpo and Abu-El-Haija, Sami and Cao, Kaidi and Fatemi, Bahare and Burrows, Michael and Mendis, Charith and Perozzi, Bryan}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Oh, A. and Neumann, T. and Globerson, A. and Saenko, K. and Hardt, M. and Levine, S.}, pages = {70355--70375}, publisher = {Curran Associates, Inc.}, title = {TpuGraphs: A Performance Prediction Dataset on Large Tensor Computational Graphs}, url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/ded1a89e2b3b925444ada973af66336e-Paper-Datasets_and_Benchmarks.pdf}, volume = {36}, year = {2023}, } MetaCom Challenges in Metaverse Research: An Internet of Things Perspective Tarek Abdelzaher , Matthew Caesar , Charith Mendis, and 3 more authors In IEEE International Conference on Metaverse Computing, Networking and Applications (MetaCom) , 2023 Bib PDF @inproceedings{metacom23-metaverse, author = {Abdelzaher, Tarek and Caesar, Matthew and Mendis, Charith and Nahrstedt, Klara and Srivastava, Mani and Yu, Minlan}, booktitle = {IEEE International Conference on Metaverse Computing, Networking and Applications (MetaCom)}, title = {Challenges in Metaverse Research: An Internet of Things Perspective}, year = {2023}, volume = {}, number = {}, pages = {161-170}, doi = {10.1109/MetaCom57706.2023.00042}, } MLSys Unified Convolution Framework: A compiler-based approach to support sparse convolutions Jaeyeon Won , Changwan Hong , Charith Mendis, and 2 more authors In Proceedings of Machine Learning and Systems (MLSys) , 2023 Bib PDF @inproceedings{mlsys23-ucf, author = {Won, Jaeyeon and Hong, Changwan and Mendis, Charith and Emer, Joel and Amarasinghe, Saman}, title = {Unified Convolution Framework: A compiler-based approach to support sparse convolutions}, keywords = {TACO,ML4C}, url = {http://groups.csail.mit.edu/commit/papers/2023/2023_MLSys_SparseConvolution_camready.pdf}, year = {2023}, address = {Miami, USA}, booktitle = {Proceedings of Machine Learning and Systems (MLSys)}, } ISCA SPADE: A Flexible and Scalable Accelerator for SpMM and SDDMM Gerasimos Gerogiannis , Serif Yesil , Damitha Lenadora , and 3 more authors In Proceedings of the 50th Annual International Symposium on Computer Architecture (ISCA) , 2023 Bib PDF @inproceedings{spade-isca2023, author = {Gerogiannis, Gerasimos and Yesil, Serif and Lenadora, Damitha and Cao, Dingyuan and Mendis, Charith and Torrellas, Josep}, title = {SPADE: A Flexible and Scalable Accelerator for SpMM and SDDMM}, year = {2023}, isbn = {9798400700958}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3579371.3589054}, doi = {10.1145/3579371.3589054}, booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture (ISCA)}, articleno = {19}, numpages = {15}, keywords = {SDDMM, sparse computations, hardware accelerator, SpMM}, location = {Orlando, FL, USA}, series = {ISCA '23}, bib = {https://dblp.org/rec/conf/isca/GerogiannisYLCM23.html?view=bibtex} } PPoPP TGOpt: Redundancy-Aware Optimizations for Temporal Graph Attention Networks Yufeng Wang , and Charith Mendis In Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming (PPoPP) , 2023 Bib PDF Code @inproceedings{tgopt-ppopp2023, author = {Wang, Yufeng and Mendis, Charith}, editor = {Dehnavi, Maryam Mehri and Kulkarni, Milind and Krishnamoorthy, Sriram}, title = {TGOpt: Redundancy-Aware Optimizations for Temporal Graph Attention Networks}, booktitle = {Proceedings of the 28th {ACM} {SIGPLAN} Annual Symposium on Principles and Practice of Parallel Programming (PPoPP)}, pages = {354--368}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3572848.3577490}, doi = {10.1145/3572848.3577490}, timestamp = {Wed, 22 Feb 2023 11:49:05 +0100}, biburl = {https://dblp.org/rec/conf/ppopp/WangM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/ppopp/WangM23.html?view=bibtex} } ASPLOS WACO: Learning Workload-Aware Co-optimization of the Format and Schedule of a Sparse Tensor Program Jaeyeon Won , Charith Mendis, Joel S. Emer , and 1 more author In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , 2023 Bib PDF Code @inproceedings{waco-asplos2023, author = {Won, Jaeyeon and Mendis, Charith and Emer, Joel S. and Amarasinghe, Saman P.}, editor = {Aamodt, Tor M. and Jerger, Natalie D. Enright and Swift, Michael M.}, title = {{WACO:} Learning Workload-Aware Co-optimization of the Format and Schedule of a Sparse Tensor Program}, booktitle = {Proceedings of the 28th {ACM} International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)}, pages = {920--934}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3575693.3575742}, doi = {10.1145/3575693.3575742}, timestamp = {Thu, 02 Feb 2023 08:48:06 +0100}, biburl = {https://dblp.org/rec/conf/asplos/WonMEA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/asplos/WonMEA23.html?view=bibtex} } 2022 IISWC GRANITE: A Graph Neural Network Model for Basic Block Throughput Estimation Ondrej Sýkora , Phitchaya Mangpo Phothilimthana , Charith Mendis, and 1 more author In IEEE International Symposium on Workload Characterization (IISWC) , 2022 Bib PDF @inproceedings{granite-iiswc2022, author = {S{\'{y}}kora, Ondrej and Phothilimthana, Phitchaya Mangpo and Mendis, Charith and Yazdanbakhsh, Amir}, title = {{GRANITE:} {A} Graph Neural Network Model for Basic Block Throughput Estimation}, booktitle = {{IEEE} International Symposium on Workload Characterization (IISWC)}, pages = {14--26}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/IISWC55918.2022.00012}, doi = {10.1109/IISWC55918.2022.00012}, timestamp = {Wed, 04 Jan 2023 16:48:41 +0100}, biburl = {https://dblp.org/rec/conf/iiswc/SykoraPMY22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/iiswc/SykoraPMY22.html?view=bibtex} } PLDI All You Need is Superword-Level Parallelism: Systematic Control-Flow Vectorization with SLP Yishen Chen , Charith Mendis, and Saman Amarasinghe In Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation (PLDI) , 2022 Bib PDF @inproceedings{superslp-pldi2022, author = {Chen, Yishen and Mendis, Charith and Amarasinghe, Saman}, title = {All You Need is Superword-Level Parallelism: Systematic Control-Flow Vectorization with SLP}, year = {2022}, isbn = {9781450392655}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://groups.csail.mit.edu/commit/papers/2022/pldi22-vegen.pdf}, doi = {10.1145/3519939.3523701}, booktitle = {Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation (PLDI)}, pages = {301--315}, numpages = {15}, keywords = {SLP}, location = {San Diego, CA, USA}, series = {PLDI 2022}, bib = {https://dblp.org/rec/conf/pldi/ChenMA22.html?view=bibtex} } 2021 ASPLOS VeGen: A Vectorizer Generator for SIMD and Beyond Yishen Chen , Charith Mendis, Michael Carbin , and 1 more author In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , 2021 Bib PDF Video @inproceedings{vegen-asplos2021, author = {Chen, Yishen and Mendis, Charith and Carbin, Michael and Amarasinghe, Saman}, title = {VeGen: A Vectorizer Generator for SIMD and Beyond}, year = {2021}, isbn = {9781450383172}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3445814.3446692}, doi = {10.1145/3445814.3446692}, booktitle = {Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)}, pages = {902–914}, numpages = {13}, keywords = {optimization, auto-vectorization, non-SIMD}, location = {Virtual, USA}, series = {ASPLOS '21}, bib = {https://dblp.org/rec/conf/asplos/ChenMCA21.html?view=bibtex}, } MLSys A Learned Performance Model for Tensor Processing Units Sam Kaufman , Phitchaya Phothilimthana , Yanqi Zhou , and 4 more authors In Proceedings of Machine Learning and Systems (MLSys) , 2021 Bib PDF @inproceedings{tpu-costmodel-mlsys2021, author = {Kaufman, Sam and Phothilimthana, Phitchaya and Zhou, Yanqi and Mendis, Charith and Roy, Sudip and Sabne, Amit and Burrows, Mike}, booktitle = {Proceedings of Machine Learning and Systems (MLSys)}, editor = {Smola, A. and Dimakis, A. and Stoica, I.}, pages = {387--400}, title = {A Learned Performance Model for Tensor Processing Units}, url = {https://proceedings.mlsys.org/paper/2021/file/85d8ce590ad8981ca2c8286f79f59954-Paper.pdf}, volume = {3}, year = {2021}, bdsk-url-1 = {https://proceedings.mlsys.org/paper/2021/file/85d8ce590ad8981ca2c8286f79f59954-Paper.pdf}, award = {used in production at Google in the XLA TPU compiler}, bib = {https://dblp.org/rec/conf/mlsys/KaufmanPZM0SB21.html?view=bibtex} } 2020 MICRO DiffTune: Optimizing CPU Simulator Parameters with Learned Differentiable Surrogates Alex Renda , Yishen Chen , Charith Mendis, and 1 more author In 53rd Annual IEEE/ACM International Symposium on Microarchitecture (MICRO) , 2020 Bib PDF Video @inproceedings{difftune-micro2020, author = {Renda, Alex and Chen, Yishen and Mendis, Charith and Carbin, Michael}, title = {DiffTune: Optimizing {CPU} Simulator Parameters with Learned Differentiable Surrogates}, booktitle = {53rd Annual {IEEE/ACM} International Symposium on Microarchitecture (MICRO)}, pages = {442--455}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/MICRO50266.2020.00045}, doi = {10.1109/MICRO50266.2020.00045}, timestamp = {Tue, 17 Nov 2020 13:33:12 +0100}, biburl = {https://dblp.org/rec/conf/micro/RendaCMC20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/micro/RendaCMC20.html?view=bibtex}, } 2019 CC Revec: program rejuvenation through revectorization Charith Mendis, Ajay Jain , Paras Jain , and 1 more author In Proceedings of the 28th International Conference on Compiler Construction (CC) , 2019 Bib PDF Video @inproceedings{revec-cc2019, author = {Mendis, Charith and Jain, Ajay and Jain, Paras and Amarasinghe, Saman P.}, editor = {Amaral, Jos{\'{e}} Nelson and Kulkarni, Milind}, title = {Revec: program rejuvenation through revectorization}, booktitle = {Proceedings of the 28th International Conference on Compiler Construction (CC)}, pages = {29--41}, publisher = {{ACM}}, year = {2019}, url = {https://doi.org/10.1145/3302516.3307357}, doi = {10.1145/3302516.3307357}, timestamp = {Sun, 02 Oct 2022 15:56:08 +0200}, biburl = {https://dblp.org/rec/conf/cc/MendisJJA19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/cc/MendisJJA19.html?view=bibtex}, } ICML Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks Charith Mendis, Alex Renda , Saman P. Amarasinghe , and 1 more author In Proceedings of the 36th International Conference on Machine Learning (ICML) , 2019 Bib PDF Code @inproceedings{ithemal-icml2019, author = {Mendis, Charith and Renda, Alex and Amarasinghe, Saman P. and Carbin, Michael}, editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan}, title = {Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks}, booktitle = {Proceedings of the 36th International Conference on Machine Learning (ICML)}, series = {Proceedings of Machine Learning Research}, volume = {97}, pages = {4505--4515}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v97/mendis19a.html}, timestamp = {Tue, 11 Jun 2019 15:37:38 +0200}, biburl = {https://dblp.org/rec/conf/icml/MendisRAC19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, award = {Best Paper award at the ML for systems workshop co-located with ISCA'19}, bib = {https://dblp.org/rec/conf/icml/MendisRAC19.html?view=bibtex}, } IISWC BHive: A Benchmark Suite and Measurement Framework for Validating x86-64 Basic Block Performance Models Yishen Chen , Ajay Brahmakshatriya , Charith Mendis, and 5 more authors In IEEE International Symposium on Workload Characterization (IISWC) , 2019 Bib PDF Code @inproceedings{bhive-iiswc2019, author = {Chen, Yishen and Brahmakshatriya, Ajay and Mendis, Charith and Renda, Alex and Atkinson, Eric and S{\'{y}}kora, Ondrej and Amarasinghe, Saman P. and Carbin, Michael}, title = {BHive: {A} Benchmark Suite and Measurement Framework for Validating x86-64 Basic Block Performance Models}, booktitle = {{IEEE} International Symposium on Workload Characterization (IISWC)}, pages = {167--177}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/IISWC47752.2019.9042166}, doi = {10.1109/IISWC47752.2019.9042166}, timestamp = {Mon, 23 Mar 2020 13:15:20 +0100}, biburl = {https://dblp.org/rec/conf/iiswc/ChenBMRASAC19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/iiswc/ChenBMRASAC19.html?view=bibtex}, } NeurIPS Compiler Auto-Vectorization with Imitation Learning Charith Mendis, Cambridge Yang , Yewen Pu , and 2 more authors In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems (NeurIPS) , 2019 Bib PDF @inproceedings{vemal-neurips2019, author = {Mendis, Charith and Yang, Cambridge and Pu, Yewen and Amarasinghe, Saman P. and Carbin, Michael}, editor = {Wallach, Hanna M. and Larochelle, Hugo and Beygelzimer, Alina and d'Alch{\'{e}}{-}Buc, Florence and Fox, Emily B. and Garnett, Roman}, title = {Compiler Auto-Vectorization with Imitation Learning}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems (NeurIPS)}, pages = {14598--14609}, year = {2019}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/MendisYPAC19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/nips/MendisYPAC19.html?view=bibtex} } 2018 OOPSLA goSLP: globally optimized superword level parallelism framework Charith Mendis, and Saman P. Amarasinghe Proc. ACM Program. Lang. (OOPSLA), 2018 Bib PDF Video @article{goslp-oopsla2018, author = {Mendis, Charith and Amarasinghe, Saman P.}, title = {goSLP: globally optimized superword level parallelism framework}, journal = {Proc. {ACM} Program. Lang. (OOPSLA)}, volume = {2}, number = {{OOPSLA}}, pages = {110:1--110:28}, year = {2018}, url = {https://doi.org/10.1145/3276480}, doi = {10.1145/3276480}, timestamp = {Sun, 02 Oct 2022 15:26:02 +0200}, biburl = {https://dblp.org/rec/journals/pacmpl/MendisA18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/journals/pacmpl/MendisA18.html?view=bibtex}, } 2017 BigData Making caches work for graph analytics Yunming Zhang , Vladimir Kiriansky , Charith Mendis, and 2 more authors In IEEE International Conference on Big Data (BigData) , 2017 Bib PDF @inproceedings{cagra-bigdata2017, author = {Zhang, Yunming and Kiriansky, Vladimir and Mendis, Charith and Amarasinghe, Saman P. and Zaharia, Matei}, editor = {Nie, Jian{-}Yun and Obradovic, Zoran and Suzumura, Toyotaro and Ghosh, Rumi and Nambiar, Raghunath and Wang, Chonggang and Zang, Hui and Baeza{-}Yates, Ricardo and Hu, Xiaohua and Kepner, Jeremy and Cuzzocrea, Alfredo and Tang, Jian and Toyoda, Masashi}, title = {Making caches work for graph analytics}, booktitle = {{IEEE} International Conference on Big Data (BigData)}, pages = {293--302}, publisher = {{IEEE} Computer Society}, year = {2017}, url = {https://doi.org/10.1109/BigData.2017.8257937}, doi = {10.1109/BigData.2017.8257937}, timestamp = {Fri, 19 Nov 2021 16:08:20 +0100}, biburl = {https://dblp.org/rec/conf/bigdataconf/ZhangKMAZ17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, award = {Best Student Paper award}, bib = {https://dblp.org/rec/conf/bigdataconf/ZhangKMAZ17.html?view=bibtex} } 2016 ICASSP Parallelizing WFST speech decoders Charith Mendis, Jasha Droppo , Saeed Maleki , and 3 more authors In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , 2016 Bib PDF @inproceedings{par-wfst-icassp2016, author = {Mendis, Charith and Droppo, Jasha and Maleki, Saeed and Musuvathi, Madanlal and Mytkowicz, Todd and Zweig, Geoffrey}, title = {Parallelizing {WFST} speech decoders}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {5325--5329}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/ICASSP.2016.7472694}, doi = {10.1109/ICASSP.2016.7472694}, timestamp = {Sun, 25 Oct 2020 23:13:48 +0100}, biburl = {https://dblp.org/rec/conf/icassp/MendisDMMMZ16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/icassp/MendisDMMMZ16.html?view=bibtex} } 2015 PLDI Helium: lifting high-performance stencil kernels from stripped x86 binaries to halide DSL code Charith Mendis, Jeffrey Bosboom , Kevin Wu , and 5 more authors In Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI) , 2015 Bib PDF @inproceedings{helium-pldi2015, author = {Mendis, Charith and Bosboom, Jeffrey and Wu, Kevin and Kamil, Shoaib and Ragan{-}Kelley, Jonathan and Paris, Sylvain and Zhao, Qin and Amarasinghe, Saman P.}, editor = {Grove, David and Blackburn, Stephen M.}, title = {Helium: lifting high-performance stencil kernels from stripped x86 binaries to halide {DSL} code}, booktitle = {Proceedings of the 36th {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation (PLDI)}, pages = {391--402}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2737924.2737974}, doi = {10.1145/2737924.2737974}, timestamp = {Thu, 24 Nov 2022 09:18:38 +0100}, biburl = {https://dblp.org/rec/conf/pldi/MendisBWKRPZA15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, bib = {https://dblp.org/rec/conf/pldi/MendisBWKRPZA15.html?view=bibtex}, web = {http://projects.csail.mit.edu/helium/} }