publications | Soichiro Nishimori

2026

Emergence of Exploration in Policy Gradient Reinforcement Learning via Retrying

Soichiro Nishimori, Paavo Parmas , Sotetsu Koyamada , and 4 more authors

2026

2025

RLC

Recursive Reward Aggregation

Yuting Tang , Yivan Zhang , Johannes Ackermann , and 4 more authors

RLC, 2025

@article{key,
  author = {Tang, Yuting and Zhang, Yivan and Ackermann, Johannes and Zhang, Yu-jie and Nishimori, Soichiro and Zhang, Yuting and Sugiyama, Masashi},
  journal = {RLC},
  title = {Recursive Reward Aggregation},
  year = {2025}
}

RLC

Offline Reinforcement Learning with Domain-Unlabeled Data

Soichiro Nishimori, Xin-Qiang Cai , Johannes Ackermann , and 1 more author

RLC, 2025

Bib

@article{nishimori2025offline,
  title = {Offline Reinforcement Learning with Domain-Unlabeled Data},
  author = {Nishimori, Soichiro and Cai, Xin-Qiang and Ackermann, Johannes and Sugiyama, Masashi},
  journal = {RLC},
  year = {2025},
}

github

Mahjax: A GPU-Accelerated Mahjong Simulator for Reinforcement Learning in JAX

Soichiro Nishimori

github, 2025

Bib

2024

arXiv

A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees

Toshinori Kitamura , Tadashi Kozuno , Masahiro Kato , and 6 more authors

RLC Workshop, 2024

Bib

@article{kitamura2024policy,
  title = {A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees},
  author = {Kitamura, Toshinori and Kozuno, Tadashi and Kato, Masahiro and Ichihara, Yuki and Nishimori, Soichiro and Sannai, Akiyoshi and Sonoda, Sho and Kumagai, Wataru and Matsuo, Yutaka},
  journal = {RLC Workshop},
  year = {2024}
}

arXiv

Leveraging Domain-Unlabeled Data in Offline Reinforcement Learning across Two Domains

Soichiro Nishimori, Xin-Qiang Cai , Johannes Ackermann , and 1 more author

arXiv preprint, 2024

Bib

@article{nishimori2024leveraging,
  title = {Leveraging Domain-Unlabeled Data in Offline Reinforcement Learning across Two Domains},
  author = {Nishimori, Soichiro and Cai, Xin-Qiang and Ackermann, Johannes and Sugiyama, Masashi},
  journal = {arXiv preprint},
  year = {2024}
}

RLC

A Batch Sequential Halving Algorithm without Performance Degradation

Sotetsu Koyamada , Soichiro Nishimori, and Shin Ishii

RLC, 2024

Bib

@article{koyamada2024batch,
  title = {A Batch Sequential Halving Algorithm without Performance Degradation},
  author = {Koyamada, Sotetsu and Nishimori, Soichiro and Ishii, Shin},
  journal = {RLC},
  year = {2024}
}

github

JAX-CORL: A single-file repository for offline reinforcement learning

Soichiro Nishimori

github, 2024

Bib

2023

NIPS

Pgx: Hardware-accelerated parallel game simulators for reinforcement learning

Sotetsu Koyamada , Shinri Okano , Soichiro Nishimori, and 4 more authors

NeurIPS, 2023

Bib

@article{koyamada2024pgx,
  title = {Pgx: Hardware-accelerated parallel game simulators for reinforcement learning},
  author = {Koyamada, Sotetsu and Okano, Shinri and Nishimori, Soichiro and Murata, Yu and Habara, Keigo and Kita, Haruka and Ishii, Shin},
  journal = {NeurIPS},
  volume = {36},
  year = {2023},
}

arXiv

End-to-End Policy Gradient Method for POMDPs and Explainable Agents

Soichiro Nishimori, Sotetsu Koyamada , and Shin Ishii

arXiv preprint, 2023

Bib

@article{nishimori2023end,
  title = {End-to-End Policy Gradient Method for POMDPs and Explainable Agents},
  author = {Nishimori, Soichiro and Koyamada, Sotetsu and Ishii, Shin},
  journal = {arXiv preprint},
  year = {2023}
}

2022

IEEE

Mjx: A framework for Mahjong AI research

Sotetsu Koyamada , Keigo Habara , Nao Goto , and 3 more authors

In , 2022

Bib

@inproceedings{koyamada2022mjx,
  title = {Mjx: A framework for Mahjong AI research},
  author = {Koyamada, Sotetsu and Habara, Keigo and Goto, Nao and Okano, Shinri and Nishimori, Soichiro and Ishii, Shin},
  journal = {2022 IEEE Conference on Games (CoG)},
  pages = {504--507},
  year = {2022},
  organization = {IEEE}
}