hashiruAI / paper /references.bib
Kunal Pai
Add base models for Employee agents
03de09a
raw
history blame
17.8 kB
@article{shen2023hugginggpt,
title = {HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face},
author = {Shen, Yongliang and Song, Kaitao and Tan, Xu and Li, Dongsheng and Lu, Weiming and Zhuang, Yueting},
journal = {arXiv preprint arXiv:2303.17580},
year = {2023}
}
@article{wu2023autogen,
title = {{AutoGen}: Enabling Next-Gen {LLM} Applications via Multi-Agent Conversation},
author = {Wu, Qingyun and Bansal, Gagan and Zhang, Jieyu and Wu, Yiran and Li, Beibin and Zhu, Erkang and Jiang, Li and Zhang, Xiaoyun and Zhang, Shaokun and Liu, Jiale and Awadallah, Ahmed H. and White, Ryen W. and Burger, Doug and Wang, Chi},
journal = {arXiv preprint arXiv:2308.08155},
year = {2023}
}
@article{lopez2024nyt,
title={NYT-Connections: A Deceptively Simple Text Classification Task that Stumps System-1 Thinkers},
author={Lopez, Angel Yahir Loredo and McDonald, Tyler and Emami, Ali},
journal={arXiv preprint arXiv:2412.01621},
year={2024}
}
@misc{wang2020minilmdeepselfattentiondistillation,
title={MiniLM: Deep Self-Attention Distillation for Task-Agnostic Compression of Pre-Trained Transformers},
author={Wenhui Wang and Furu Wei and Li Dong and Hangbo Bao and Nan Yang and Ming Zhou},
year={2020},
eprint={2002.10957},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2002.10957},
}
@misc{gemini25flash,
title = {Gemini 2.5 Flash: Model Card, API, and Announcement},
author = {{Google DeepMind} and {Google AI}},
year = {2025},
howpublished = {\url{https://developers.googleblog.com/en/start-building-with-gemini-25-flash/}},
note = {See also:
\url{https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemini-2.5-flash-preview-04-17?inv=1&invt=AbxICQ},
\url{https://ai.google.dev/gemini-api/docs/models}. Accessed: 2025-05-11}
}
@inproceedings{yao2022react,
title = {{ReAct}: Synergizing Reasoning and Acting in Language Models},
author = {Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
booktitle = {International Conference on Learning Representations (ICLR)},
year = {2023},
note = {arXiv:2210.03629}
}
@article{schick2023toolformer,
title = {Toolformer: Language Models Can Teach Themselves to Use Tools},
author = {Schick, Timo and Dwivedi-Yu, Jane and Bitton, Yonatan and Yuan, Xi and Camburu, Oana-Maria and Houlsby, Neil},
journal = {arXiv preprint arXiv:2302.04761},
year = {2023}
}
@article{ong2024routellm,
title = {{RouteLLM}: Learning to Route {LLMs} with Preference Data},
author = {Ong, Isaac and Almahairi, Amjad and Wu, Vincent and Chiang, Wei-Lin and Wu, Tianhao and Gonzalez, Joseph E. and Kadous, M. Waleed and Stoica, Ion},
journal = {arXiv preprint arXiv:2406.18665},
year = {2024}
}
@article{fourney2024magentic,
title = {Magentic-One: A Generalist Multi-Agent System for Solving Complex Tasks},
author = {Fourney, Adam and Bansal, Gagan and Mozannar, Hussein and Tan, Cheng and et al.},
journal = {arXiv preprint arXiv:2411.04468},
year = {2024}
}
@inproceedings{cobbe2021gsm8k,
title = {Training Verifiers to Solve Math Word Problems},
author = {Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and Hesse, Christopher and Schulman, John},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
year = {2021},
note = {Dataset introduced: GSM8K (Grade School Math 8K)}
}
@inproceedings{patel2021svamp,
title = {Are {NLP} Models really able to Solve Simple Math Word Problems?},
author = {Patel, Arkil and Bhattamishra, Satwik and Goyal, Navin},
booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
year = {2021},
note = {Introduces the SVAMP challenge dataset}
}
@misc{phan2025humanitysexam,
title = {Humanity's Last Exam},
author = {Phan, Long and Gatti, Alice and Han, Ziwen and others},
year = {2025},
eprint = {2501.14249},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
url = {https://arxiv.org/abs/2501.14249}
}
@article{chen2021codex,
title = {Evaluating Large Language Models Trained on Code},
author = {Chen, Mark and Tworek, Jerry and Jun, Heewoo and Yuan, Qiming and Ponde de Oliveira Pinto, Henrique and Kaplan, Jared and Edwards, Harri and Burda, Yuri and Joseph, Nicholas and Brockman, Greg and Ray, Alex and Puri, Raul and Krueger, Gretchen and Petrov, Michael and Khlaaf, Heidy and Sastry, Girish and Mishkin, Pamela and Chan, Brooke and Gray, Scott and Ryder, Nick and Pavlov, Mikhail and Power, Alethea and Kaiser, Lukasz and Bavarian, Mohammad and Winter, Clemens and Tillet, Philippe and Such, Felipe and Cummings, Dave and Plappert, Matthias and Chantzis, Fotios and Barnes, Elizabeth and Herbert-Voss, Ariel and Guss, William and Nichol, Alex and Paino, Alex and Tezak, Nikolas and Tang, Jie and Babuschkin, Igor and Balaji, Suchir and Jain, Shantanu and Saunders, William and Hesse, Christopher and Carr, Andrew N. and Leike, Jan and Achiam, Josh and Misra, Vedant and Morikawa, Evan and Radford, Alec and Knight, Matthew and Brundage, Miles and Murati, Mira and Mayer, Katie and Welinder, Peter and McGrew, Bob and Amodei, Dario and McCandlish, Sam and Sutskever, Ilya and Zaremba, Wojciech},
journal = {arXiv preprint arXiv:2107.03374},
year = {2021},
note = {OpenAI Codex paper; introduced HumanEval benchmark}
}
@article{pai2024codocbench,
title = {{CoDocBench}: A Dataset for Code-Documentation Alignment in Software Maintenance},
author = {Pai, Kunal and Devanbu, Premkumar and Ahmed, Toufique},
journal = {arXiv preprint arXiv:2502.00519},
year = {2024}
}
@inproceedings{kamienski2021pysstubs,
title = {{PySStuBs}: Characterizing Single-Statement Bugs in Popular Open-Source Python Projects},
author = {Kamienski, Arthur V. and Palechor, Luisa and Bezemer, Cor-Paul and Hindle, Abram},
booktitle = {IEEE/ACM International Conference on Mining Software Repositories (MSR)},
year = {2021}
}
@article{brown2020language,
title={Language models are few-shot learners},
author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
journal={Advances in neural information processing systems},
volume={33},
pages={1877--1901},
year={2020}
}
@inproceedings{devlin2019bert,
title={Bert: Pre-training of deep bidirectional transformers for language understanding},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
booktitle={Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)},
pages={4171--4186},
year={2019}
}
@article{raffel2020exploring,
title={Exploring the limits of transfer learning with a unified text-to-text transformer},
author={Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
journal={Journal of machine learning research},
volume={21},
number={140},
pages={1--67},
year={2020}
}
@article{dorri2018multi,
title={Multi-agent systems: A survey},
author={Dorri, Ali and Kanhere, Salil S and Jurdak, Raja},
journal={Ieee Access},
volume={6},
pages={28573--28593},
year={2018},
publisher={IEEE}
}
@book{wooldridge2009introduction,
title={An introduction to multiagent systems},
author={Wooldridge, Michael},
year={2009},
publisher={John wiley \& sons}
}
@article{boiko2023emergent,
title={Emergent autonomous scientific research capabilities of large language models},
author={Boiko, Daniil A and MacKnight, Robert and Gomes, Gabe},
journal={arXiv preprint arXiv:2304.05332},
year={2023}
}
@inproceedings{gaston2005agenta,
title={Agent-organized networks for dynamic team formation},
author={Gaston, Matthew E and DesJardins, Marie},
booktitle={Proceedings of the fourth international joint conference on Autonomous agents and multiagent systems},
pages={230--237},
year={2005}
}
@misc{zhou2023agents,
title={Agents: An Open-source Framework for Large Language Model based Autonomous Agents},
author={Wangchunshu Zhou and Jianshu Chen and Jialong Wu and Yiheng Xu and Kexin Wang and Jintian Zhang and Yuan Gao and Zhiyong Wu and Kevin Tian and Yubo Feng and Linyi Yang and Bokai Quan and Cong Yu and Yuhang Wang and Shishen Lan and Yan Wang and Hong-Cheng Guo and Chaoyu Chen and Tianxiang Sun and Jin Xiong and Yi Lu and Peng Li and Lichao Sun and Lifan Yuan and Hang Li and Xiangang Li},
year={2023},
eprint={2309.07870},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2309.07870}
}
@misc{openai_func_calling,
title = {Function calling},
author = {{OpenAI}},
year = {2023},
howpublished = {OpenAI API Documentation},
url = {https://platform.openai.com/docs/guides/function-calling},
note = {Accessed: 2025-05-01}
}
@misc{wang2023voyager,
title={{Voyager}: An Open-Ended Embodied Agent with Large Language Models},
author={Guanzhi Wang and Yuqi Xie and Yunfan Jiang and Ajay Mandlekar and Chaowei Xiao and Yuke Zhu and Linxi Fan and Anima Anandkumar},
year={2023},
eprint={2305.16291},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2305.16291}
}
@book{russell2010artificial,
title={Artificial intelligence: a modern approach},
author={Russell, Stuart J. and Norvig, Peter},
year={2010},
edition={3rd},
publisher={Prentice Hall Press},
address={Upper Saddle River, NJ, USA}
}
@article{shoham1994agent,
author = {Yoav Shoham},
title = {Agent-oriented programming},
journal = {Artificial Intelligence},
volume = {60},
number = {1},
pages = {51--92},
year = {1993},
publisher = {Elsevier}
}
@misc{wang2023survey,
title={A Survey on Large Language Model based Autonomous Agents},
author={Lei Wang and Chen Ma and Xueyang Feng and Zeyu Zhang and Hao Yang and Jingsen Zhang and Zhiyuan Chen and Jiakai Tang and Xu Chen and Yankai Lin and Wayne Xin Zhao and Zhewei Wei and Ji-Rong Wen},
year={2023},
eprint={2308.11432},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@misc{xi2023rise,
title={The Rise and Potential of Large Language Model Based Agents: A Survey},
author={Zhiheng Xi and Wenxiang Chen and Xin Guo and Wei He and Yiwen Ding and Boyang Hong and Ming Zhang and Junzhe Wang and Senjie Jin and Enyu Zhou and Rui Zheng and Xiaoran Fan and Xiao Wang and Limao Xiong and Linyi Yang and Ting Ruan and Yongquan Yang and Peng Li and Yitao Chang and Yanlin Wang},
year={2023},
eprint={2309.07864},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@inproceedings{park2023generative,
author = {Park, Joon Sung and O'Brien, Joseph C. and Cai, Carrie J. and Morris, Meredith Ringel and Liang, Percy and Bernstein, Michael S.},
title = {Generative Agents: Interactive Simulacra of Human Behavior},
year = {2023},
isbn = {9798400701320},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3586183.3606763},
doi = {10.1145/3586183.3606763},
booktitle = {The 36th Annual ACM Symposium on User Interface Software and Technology (UIST '23)},
pages = {1–22},
numpages = {22},
location = {San Francisco, CA, USA},
series = {UIST '23}
}
@misc{ollama,
title = {Ollama},
author = {{Ollama Team}},
howpublished = {\url{https://ollama.com/}},
year = {2023},
note = {Accessed: 2025-05-01}
}
@misc{anthropic2024claude,
title = {The {Claude 3} Model Family: {Opus, Sonnet, Haiku}},
author = {{Anthropic}},
year = {2024},
month = {March},
howpublished = {Model Card},
url = {https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf},
note = {Accessed: 2025-05-01}
}
@misc{openai2023gpt4,
title={GPT-4 Technical Report},
author={OpenAI},
year={2023},
eprint={2303.08774},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2303.08774}
}
@misc{langgraph,
title = {LangGraph: A Framework for Agentic Workflows},
author = {LangChain},
year = {2024},
howpublished= {\url{https://www.langchain.com/langgraph}},
note = {Accessed: May 1, 2025}
}
@book{clearwater1996market,
title = {Market-Based Control: A Paradigm for Distributed Resource Allocation},
editor = {Scott H. Clearwater},
publisher = {World Scientific},
year = {1996}
}
@article{valckenaers2005trends,
title={Guest Editors' Introduction: Intelligent Control in the Manufacturing Supply Chain},
author={McFarlane, Duncan and Mar{\'\i}k, Vladim{\'\i}r and Valckenaers, Paul},
journal={IEEE Intelligent Systems},
volume={20},
number={1},
pages={24--26},
year={2005},
publisher={IEEE}
}
@article{horling2004survey,
title={A survey of multi-agent organizational paradigms},
author={Horling, Bryan and Lesser, Victor},
journal={The Knowledge engineering review},
volume={19},
number={4},
pages={281--316},
year={2004},
publisher={Cambridge University Press}
}
@inproceedings{gaston2005agentb,
title={Agent-organized networks for multi-agent production and exchange},
author={Gaston, Matthew E and DesJardins, Marie},
booktitle={Proceedings of the 20th national conference on Artificial intelligence-Volume 1},
pages={77--82},
year={2005}
}
@misc{zhang2023building,
title={Building Cooperative Embodied Agents Modularly with Large Language Models},
author={Hongxin Zhang and Weihua Du and Jiaming Shan and Qinhong Zhou and Yilun Du and Joshua B. Tenenbaum and Tianmin Shu and Chuang Gan},
year={2023},
eprint={2307.02485},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@misc{parisi2022talm,
title={TALM: Tool Augmented Language Models},
author={Aaron Parisi and Yao Zhao and Noah Fiedel},
year={2022},
eprint={2205.12255},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{crewai,
title = {CrewAI},
author = {{CrewAI Inc.}},
year = {2025},
howpublished = {\url{https://www.crewai.com/}},
note = {Accessed: 2025-05-01}
}
@article{qian2023communicative,
title={Chatdev: Communicative agents for software development},
author={Qian, Chen and Liu, Wei and Liu, Hongzhang and Chen, Nuo and Dang, Yufan and Li, Jiahao and Yang, Cheng and Chen, Weize and Su, Yusheng and Cong, Xin and others},
journal={arXiv preprint arXiv:2307.07924},
year={2023}
}
@article{wang2023decision,
title={Decision-making driven by driver intelligence and environment reasoning for high-level autonomous vehicles: a survey},
author={Wang, Yuning and Jiang, Junkai and Li, Shangyi and Li, Ruochen and Xu, Shaobing and Wang, Jianqiang and Li, Keqiang},
journal={IEEE Transactions on Intelligent Transportation Systems},
volume={24},
number={10},
pages={10362--10381},
year={2023},
publisher={IEEE}
}
@misc{wen2024benchmarkingcomplexinstructionfollowingmultiple,
title={Benchmarking Complex Instruction-Following with Multiple Constraints Composition},
author={Bosi Wen and Pei Ke and Xiaotao Gu and Lindong Wu and Hao Huang and Jinfeng Zhou and Wenchuang Li and Binxin Hu and Wendy Gao and Jiaxin Xu and Yiming Liu and Jie Tang and Hongning Wang and Minlie Huang},
year={2024},
eprint={2407.03978},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2407.03978},
}
@article{jiang2023mistral,
title={{Mistral 7B}},
author={Jiang, Albert Q and Xu, Alexandre and Lachaux, Arthur Mensch Guillaume Lample Nicol{\`a}s and Rozenberg, Fran{\c{c}}ois and Lacroix, Timoth{\'e}e and Lavril, Thibaut and Gaddipati, Teven Le Scao Eleonora and Ortiz, Lucile Saulnier Lixin and Tang, Dieuwke Hiemstra L{\'e}lio Renard and others},
year={2023},
eprint={2310.06825},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2310.06825},
}
@article{llama3herd,
title={{The Llama 3 Herd of Models}},
author={{Meta Llama Team}},
year={2024},
eprint={2407.21783},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2407.21783},
note={arXiv:2407.21783}
}
@article{gemini1.5_report,
title={{Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context}},
author={{Gemini Team}},
year={2024},
eprint={2403.05530},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2403.05530},
note={arXiv:2403.05530}
}
@article{qwen2.5_report,
title={{Qwen2.5 Technical Report}},
author={{Qwen Team} and Yang, An and others},
year={2024},
eprint={2412.15115},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.15115},
note={arXiv:2412.15115}
}
@misc{qwen3_blog,
title={{Qwen3: Think Deeper, Act Faster}},
author={{Qwen Team}},
howpublished={\url{https://qwenlm.github.io/blog/qwen3/}},
year={2025}
}
@article{deepseekr1_report,
title={{DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning}},
author={{DeepSeek-AI and others}},
year={2025},
eprint={2501.12948},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2501.12948},
note={arXiv:2501.12948}
}