Publications | UNSW NLP

click to expand

2026

From Triage to Discharge: A Survey of NLP Tasks, Methods, and Open Challenges in the Emergency Department

Dipankar Srirag, Aditya Joshi, Padmanesan Narasimhan, Salil Kanhere

BibTeX

@misc{srirag-etal-2026-triagetodischarge,
    title = "From Triage to Discharge: A Survey of {NLP} Tasks, Methods, and Open Challenges in the Emergency Department",
    author = "Srirag, Dipankar and Joshi, Aditya and Narasimhan, Padmanesan and Kanhere, Salil",
    year = "2026",
    url = "https://dipankarsrirag.github.io/dipankar-triagetodischargesurvey-preprint.pdf",
    note = "Preprint"
}

Copied!

TriageSim: A Conversational Emergency Triage Simulation Framework from Structured Electronic Health Records

Dipankar Srirag, Quoc Dung Nguyen, Aditya Joshi, Padmanesan Narasimhan, Salil Kanhere

PDF Code Project

BibTeX

@misc{srirag-etal-2026-triagesim,
    title = "{TriageSim}: A Conversational Emergency Triage Simulation Framework from Structured Electronic Health Records",
    author = "Srirag, Dipankar and Nguyen, Quoc Dung and Joshi, Aditya and Narasimhan, Padmanesan and Kanhere, Salil",
    year = "2026",
    eprint = "2603.10035",
    archivePrefix = "arXiv",
    primaryClass = "cs.CL",
    url = "https://arxiv.org/abs/2603.10035"
}

Copied!

2025

CAIRNS: Balancing Readability and Scientific Accuracy in Climate Adaptation Question Answering

Liangji Kong, Aditya Joshi, Sarvnaz Karimi

PDF

BibTeX

@misc{kong2025cairnsbalancingreadabilityscientific,
  title={CAIRNS: Balancing Readability and Scientific Accuracy in Climate Adaptation Question Answering}, 
  author={Liangji Kong and Aditya Joshi and Sarvnaz Karimi},
  year={2025},
  eprint={2512.02251},
  archivePrefix={arXiv},
  primaryClass={cs.CL},
  url={https://arxiv.org/abs/2512.02251}, 
}

Copied!

A Taxonomy-Driven Case Study of Australian Web Resources Against Technology-Facilitated Abuse

Dipankar Srirag, Xiaolin Cen, Rahat Masood, Aditya Joshi

PDF

BibTeX

@misc{srirag2025taxonomydrivencasestudyaustralian,
    title={A Taxonomy-Driven Case Study of Australian Web Resources Against Technology-Facilitated Abuse}, 
    author={Dipankar Srirag and Xiaolin Cen and Rahat Masood and Aditya Joshi},
    year={2025},
    eprint={2512.04104},
    archivePrefix={arXiv},
    primaryClass={cs.CY},
    url={https://arxiv.org/abs/2512.04104}, 
}

Copied!

LangLingual: A Personalised, Exercise-oriented English Language Learning Tool Leveraging Large Language Models

Sammriddh Gupta, Sonit Singh, Aditya Joshi, Mira Kim

PDF

BibTeX

@misc{gupta2025langlingualpersonalisedexerciseorientedenglish,
  title={LangLingual: A Personalised, Exercise-oriented English Language Learning Tool Leveraging Large Language Models}, 
  author={Sammriddh Gupta and Sonit Singh and Aditya Joshi and Mira Kim},
  year={2025},
  eprint={2510.23011},
  archivePrefix={arXiv},
  primaryClass={cs.CL},
  url={https://arxiv.org/abs/2510.23011}, 
}

Copied!

Alternatives To Next Token Prediction In Text Generation -- A Survey

Charles Wyatt, Aditya Joshi, Flora Salim

PDF

BibTeX

@misc{wyatt2025alternativestokenpredictiontext,
  title={Alternatives To Next Token Prediction In Text Generation -- A Survey}, 
  author={Charlie Wyatt and Aditya Joshi and Flora Salim},
  year={2025},
  eprint={2509.24435},
  archivePrefix={arXiv},
  primaryClass={cs.CL},
  url={https://arxiv.org/abs/2509.24435}, 
}

Copied!

A Survey on Multimodal Music Emotion Recognition

Rashini Liyanarachchi, Aditya Joshi, Erik Meijering

PDF

BibTeX

@misc{liyanarachchi2025surveymultimodalmusicemotion,
  title={A Survey on Multimodal Music Emotion Recognition}, 
  author={Rashini Liyanarachchi and Aditya Joshi and Erik Meijering},
  year={2025},
  eprint={2504.18799},
  archivePrefix={arXiv},
  primaryClass={cs.MM},
  url={https://arxiv.org/abs/2504.18799}, 
}

Copied!

Harnessing Test-time Adaptation for NLU tasks Involving Dialects of English

Duke Nguyen, Aditya Joshi, Flora Salim

PDF Code

BibTeX

@misc{nguyen2025harnessingtesttimeadaptationnlu,
    title={Harnessing Test-time Adaptation for NLU tasks Involving Dialects of English}, 
    author={Duke Nguyen and Aditya Joshi and Flora Salim},
    year={2025},
    eprint={2503.12858},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2503.12858}, 
}

Copied!

2023

Relation Extraction from News Articles (RENA): A Tool for Epidemic Surveillance

Jaeff Hong, Duong Dung, Danielle Hutchinson, Zubair Akhtar, Rosalie Chen, Rebecca Dawson, Aditya Joshi, Samsung Lim, C Raina MacIntyre, Deepti Gurdasani

PDF

BibTeX

@misc{hong2023relationextractionnewsarticles,
  title={Relation Extraction from News Articles (RENA): A Tool for Epidemic Surveillance}, 
  author={Jaeff Hong and Duong Dung and Danielle Hutchinson and Zubair Akhtar and Rosalie Chen and Rebecca Dawson and Aditya Joshi and Samsung Lim and C Raina MacIntyre and Deepti Gurdasani},
  year={2023},
  eprint={2311.01472},
  archivePrefix={arXiv},
  primaryClass={cs.CL},
  url={https://arxiv.org/abs/2311.01472}, 
}

Copied!

Published Work

2026

MedArabs at AbjadMed: Arabic Medical Text Classification via Data- and Algorithm-Level Fusion

Amrita Singh

AbjadNLP @ EACL 2026

PDF

BibTeX

@inproceedings{singh-2026-medarabs,
title = "{M}ed{A}rabs at {A}bjad{M}ed: {A}rabic Medical Text Classification via Data- and Algorithm-Level Fusion",
author = "Singh, Amrita",
editor = "El-Haj, Mo  and
  Rayson, Paul  and
  Jarrar, Mustafa  and
  Ezeani, Ignatius  and
  Ezzini, Saad  and
  Ahmadi, Sina  and
  Haddad Haddad, Amal  and
  Amol, Cynthia  and
  Abdelali, Ahmad  and
  Abudalfa, Shadi",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.12/",
doi = "10.18653/v1/2026.abjadnlp-1.12",
pages = "100--104",
abstract = "In this work, we address the challenges of Arabic medical text classification, focusing on class imbalance and the complexity of the language{'}s morphology. We propose a multiclass classification pipeline based on Data- and Algorithm-Level fusion, which integrates the optimal Back Translation technique for data augmentation with the Class Balanced (CB) loss function to enhance performance. The domain-specific AraBERT model is fine-tuned using this approach, achieving competitive results. On the official test set of the AbjadMed task, our pipeline achieves a Macro-F1 score of 0.4219, and it achieves 0.4068 on the development set."
}

Copied!

Far Out: Evaluating Language Models on Slang in Australian and Indian English

Deniz Dilsiz, Dipankar Srirag, Aditya Joshi

VarDial @ EACL 2026

PDF

BibTeX

@inproceedings{dilsiz-etal-2026-far,
title = "Far Out: Evaluating Language Models on Slang in {A}ustralian and {I}ndian {E}nglish",
author = "Dilsiz, Deniz Kaya  and
  Srirag, Dipankar  and
  Joshi, Aditya",
editor = {Scherrer, Yves  and
  Aepli, No{\"e}mi  and
  Blaschke, Verena  and
  Jauhiainen, Tommi  and
  Ljube{\v{s}}i{\'c}, Nikola  and
  Nakov, Preslav  and
  Tiedemann, J{\"o}rg  and
  Zampieri, Marcos},
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.2/",
doi = "10.18653/v1/2026.vardial-1.2",
pages = "18--31",
abstract = "Language models exhibit systematic performance gaps when processing text in non-standard language varieties, yet their ability to comprehend variety-specific slang remains underexplored for several languages. We present a comprehensive evaluation of slang awareness in Indian English (en-IN) and Australian English (en-AU) across seven state-of-the-art language models. We construct two complementary datasets: WEB, containing 377 web-sourced usage examples from Urban Dictionary, and GEN, featuring 1,492 synthetically generated usages of these slang terms, across diverse scenarios. We assess language models on three tasks: target word prediction (TWP), guided target word prediction (TWP*) and target word selection (TWS). Our results reveal four key findings: (1) Higher average model performance TWS versus TWP and TWP*, with average accuracy score increasing from 0.03 to 0.49 respectively (2) Stronger average model performance on WEB versus GEN datasets, with average similarity score increasing by 0.03 and 0.05 across TWP and TWP* tasks respectively (3) en-IN tasks outperform en-AU when averaged across all models and datasets, with TWS demonstrating the largest disparity, increasing average accuracy from 0.44 to 0.54. These findings underscore fundamental asymmetries between generative and discriminative competencies for variety-specific language, particularly in the context of slang expressions despite being in a technologically rich language such as English."
}

Copied!

TRACE: Textual Relevance Augmentation and Contextual Encoding for Multimodal Hate Detection

Girish A Koushik, Helen Treharne, Aditya Joshi, Diptesh Kanojia

AAAI 2026

PDF

BibTeX

@inproceedings{koushik2026trace,
title={TRACE: Textual Relevance Augmentation and Contextual Encoding for Multimodal Hate Detection},
author={Koushik, Girish A and Treharne, Helen and Joshi, Aditya and Kanojia, Diptesh},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={40},
number={45},
pages={38763--38771},
year={2026}
}

Copied!

2025

What am I missing here?: Evaluating Large Language Models for Masked Sentence Prediction

Charles Wyatt, Aditya Joshi, Flora Salim

AACL 2025

PDF

BibTeX

@inproceedings{wyatt-etal-2025-missing,
title = "What am {I} missing here?: Evaluating Large Language Models for Masked Sentence Prediction",
author = "Wyatt, Charlie  and
  Joshi, Aditya  and
  Salim, Flora D.",
editor = "Inui, Kentaro  and
  Sakti, Sakriani  and
  Wang, Haofen  and
  Wong, Derek F.  and
  Bhattacharyya, Pushpak  and
  Banerjee, Biplab  and
  Ekbal, Asif  and
  Chakraborty, Tanmoy  and
  Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-short.24/",
doi = "10.18653/v1/2025.ijcnlp-short.24",
pages = "273--283",
ISBN = "979-8-89176-299-2",
abstract = "Transformer-based models primarily rely on Next Token Prediction (NTP), which predicts the next token in a sequence based on the preceding context. However, NTP{'}s focus on single-token prediction often limits a model{'}s ability to plan ahead or maintain long-range coherence, raising questions about how well LLMs can predict longer contexts, such as full sentences within structured documents. While NTP encourages local fluency, it provides no explicit incentive to ensure global coherence across sentence boundaries{---}an essential skill for reconstructive or discursive tasks. To investigate this, we evaluate three commercial LLMs (GPT-4o, Claude 3.5 Sonnet, and Gemini 2.0 Flash) on Masked Sentence Prediction (MSP) {---} the task of infilling a randomly removed sentence {---} from three domains: ROCStories (narrative), Recipe1M (procedural), and Wikipedia (expository). We assess both fidelity (similarity to the original sentence) and cohesiveness (fit within the surrounding context). Our key finding reveals that commercial LLMs, despite their superlative performance in other tasks, are poor at predicting masked sentences in low-structured domains, highlighting a gap in current model capabilities."
}

Copied!

Nek Minit: Harnessing Pragmatic Metacognitive Prompting for Explainable Sarcasm Detection of Australian and Indian English

Ishmanbir Singh*, Dipankar Srirag*, Aditya Joshi

ALTA 2025

🏆 Best Paper Honorable Mention

PDF

BibTeX

@inproceedings{singh-etal-2025-nek,
title = "{N}ek Minit: Harnessing Pragmatic Metacognitive Prompting for Explainable Sarcasm Detection of {A}ustralian and {I}ndian {E}nglish",
author = "Singh, Ishmanbir  and
  Srirag, Dipankar  and
  Joshi, Aditya",
editor = "Kummerfeld, Jonathan K.  and
  Joshi, Aditya  and
  Dras, Mark",
booktitle = "Proceedings of the 23rd Annual Workshop of the Australasian Language Technology Association",
month = nov,
year = "2025",
address = "Sydney, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.alta-main.2/",
pages = "13--27",
ISBN = "1834-7037",
abstract = "Sarcasm is a challenge to sentiment analysis because of the incongruity between stated and implied sentiment. The challenge is exacerbated when the implication may be relevant to a specific country or geographical region. Pragmatic metacognitive prompting (PMP) is a cognition-inspired technique that has been used for pragmatic reasoning. In this paper, we harness PMP for explainable sarcasm detection for Australian and Indian English, alongside a benchmark dataset for standard English. We manually add sarcasm explanations to an existing sarcasm-labeled dataset for Australian and Indian English called BESSTIE, and compare the performance for explainable sarcasm detection for them with FLUTE, a standard English dataset containing sarcasm explanations. Our approach utilising PMP when evaluated on two open-weight LLMs (GEMMA and LLAMA) achieves statistically significant performance improvement across all tasks and datasets when compared with four alternative prompting strategies. We also find that alternative techniques such as agentic prompting mitigate context-related failures by enabling external knowledge retrieval. The focused contribution of our work is utilising PMP in generating sarcasm explanations for varieties of English."
}

Copied!

A Survey of Classification Tasks and Approaches for Legal Contracts

Amrita Singh, Aditya Joshi, Jiaojiao Jiang, Hye-young Paik

Artificial Intelligence Review

PDF

BibTeX

@article{Singh2025,
title = {A survey of classification tasks and approaches for legal contracts},
volume = {58},
ISSN = {1573-7462},
url = {http://dx.doi.org/10.1007/s10462-025-11359-8},
DOI = {10.1007/s10462-025-11359-8},
number = {12},
journal = {Artificial Intelligence Review},
publisher = {Springer Science and Business Media LLC},
author = {Singh,  Amrita and Joshi,  Aditya and Jiang,  Jiaojiao and Paik,  Hye-young},
year = {2025},
month = oct 
}

Copied!

BESSTIE: A Benchmark for Sentiment and Sarcasm Classification for Varieties of English

Dipankar Srirag, Aditya Joshi, Jordan Painter, Diptesh Kanojia

Findings of ACL 2025

PDF Dataset

BibTeX

@inproceedings{srirag-etal-2025-besstie,
title = "{BESSTIE}: A Benchmark for Sentiment and Sarcasm Classification for Varieties of {E}nglish",
author = "Srirag, Dipankar  and
  Joshi, Aditya  and
  Painter, Jordan  and
  Kanojia, Diptesh",
editor = "Che, Wanxiang  and
  Nabende, Joyce  and
  Shutova, Ekaterina  and
  Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.441/",
doi = "10.18653/v1/2025.findings-acl.441",
pages = "8413--8429",
ISBN = "979-8-89176-256-5",
abstract = "Despite large language models (LLMs) being known to exhibit bias against non-mainstream varieties, there are no known labeled datasets for sentiment analysis of English. To address this gap, we introduce BESSTIE, a benchmark for sentiment and sarcasm classification for three varieties of English: Australian (en-AU), Indian (en-IN), and British (en-UK). Using web-based content from two domains, namely, Google Place reviews and Reddit comments, we collect datasets for these language varieties using two methods: location-based and topic-based filtering. Native speakers of the language varieties manually annotate the datasets with sentiment and sarcasm labels. To assess whether the dataset accurately represents these varieties, we conduct two validation steps: (a) manual annotation of language varieties and (b) automatic language variety prediction. We perform an additional annotation exercise to validate the reliance of the annotated labels. Subsequently, we fine-tune nine large language models (LLMs) (representing a range of encoder/decoder and mono/multilingual models) on these datasets, and evaluate their performance on the two tasks. Our results reveal that the models consistently perform better on inner-circle varieties (i.e., en-AU and en-UK), with significant performance drops for en-IN, particularly in sarcasm detection. We also report challenges in cross-variety generalisation, highlighting the need for language variety-specific datasets such as ours. BESSTIE promises to be a useful evaluative benchmark for future research in equitable LLMs, specifically in terms of language varieties. The BESSTIE dataset is publicly available at: \url{https://huggingface.co/datasets/unswnlporg/BESSTIE}."
}

Copied!

RACCOON: A Retrieval-Augmented Generation Approach for Location Coordinate Capture from News Articles

Jonathan Lin, Aditya Joshi, Hye Youg Paik, et al.

WWW 2025

PDF

BibTeX

@inproceedings{10.1145/3701716.3715501,
author = {Lin, Jonathan and Joshi, Aditya and Paik, Hye-young and Doung, Tri Dung and Gurdasani, Deepti},
title = {RACCOON: A Retrieval-Augmented Generation Approach for Location Coordinate Capture from News Articles},
year = {2025},
isbn = {9798400713316},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3701716.3715501},
doi = {10.1145/3701716.3715501},
abstract = {Geocoding involves automatic extraction of location coordinates of incidents reported in news articles, and can be used for epidemic intelligence or disaster management. This paper introduces Retrieval-Augmented Coordinate Capture Of Online News articles (RACCOON), an open-source geocoding approach that extracts geolocations from news articles. RACCOON uses a retrieval-augmented generation (RAG) approach where candidate locations and associated information are retrieved in the form of context from a location database, and a prompt containing the retrieved context, location mentions and news articles is fed to an LLM to generate the location coordinates. Our evaluation on three datasets, two underlying LLMs, three baselines and several ablation tests based on the components of RACCOON demonstrate the utility of RACCOON. To the best of our knowledge, RACCOON is the first RAG-based approach for geocoding using pre-trained LLMs.},
booktitle = {Companion Proceedings of the ACM on Web Conference 2025},
pages = {1123–1127},
numpages = {5},
keywords = {geocoding, large language models, location extraction, news articles, rag, retrieval-augmented generation},
location = {Sydney NSW, Australia},
series = {WWW '25}
}

Copied!

Predicting the Target Word of Game-playing Conversations using a Low-Rank Dialect Adapter for Decoder Models

Dipankar Srirag, Aditya Joshi, Jacob Eisenstein

NAACL 2025

PDF Code

BibTeX

@inproceedings{srirag-etal-2025-predicting,
    title = "Predicting the Target Word of Game-playing Conversations using a Low-Rank Dialect Adapter for Decoder Models",
    author = "Srirag, Dipankar  and
    Joshi, Aditya  and
    Eisenstein, Jacob",
    editor = "Chiruzzo, Luis  and
    Ritter, Alan  and
    Wang, Lu",
    booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)",
    month = apr,
    year = "2025",
    address = "Albuquerque, New Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2025.naacl-short.2/",
    doi = "10.18653/v1/2025.naacl-short.2",
    pages = "8--17",
    ISBN = "979-8-89176-190-2",
    abstract = "Dialect adapters that improve the performance of LLMs for NLU tasks on certain sociolects/dialects/national varieties ({`}dialects' for the sake of brevity) have been reported for encoder models. In this paper, we extend the idea of dialect adapters to decoder models in our architecture called LoRDD. Using MD-3, a publicly available dataset of word game-playing conversations between dialectal speakers, our task is Target Word Prediction (TWP) from a masked conversation. LoRDD combines task adapters and dialect adapters where the latter employ contrastive learning on pseudo-parallel conversations from MD-3. Our experiments on Indian English and Nigerian English conversations with two models (Mistral and Gemma) demonstrate that LoRDD outperforms four baselines on TWP. Additionally, it significantly reduces the performance gap with American English, narrowing it to 12{\%} and 5.8{\%} for word similarity, and 25{\%} and 4.5{\%} for accuracy, respectively. The focused contribution of LoRDD is in its promise for dialect adaptation of decoder models using TWP, a simplified version of the commonly used next-word prediction task."
}

Copied!

Natural Language Processing for Dialects of a Language: A Survey

Aditya Joshi, Raj Dabre, Diptesh Kanojia, et al.

ACM Computing Surveys

PDF

BibTeX

@article{10.1145/3712060,
author = {Joshi, Aditya and Dabre, Raj and Kanojia, Diptesh and Li, Zhuang and Zhan, Haolan and Haffari, Gholamreza and Dippold, Doris},
title = {Natural Language Processing for Dialects of a Language: A Survey},
year = {2025},
issue_date = {June 2025},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {57},
number = {6},
issn = {0360-0300},
url = {https://doi.org/10.1145/3712060},
doi = {10.1145/3712060},
abstract = {State-of-the-art natural language processing (NLP) models are trained on massive training corpora, and report a superlative performance on evaluation datasets. This survey delves into an important attribute of these datasets: the dialect of a language. Motivated by the performance degradation of NLP models for dialectal datasets and its implications for the equity of language technologies, we survey past research in NLP for dialects in terms of datasets, and approaches. We describe a wide range of NLP tasks in terms of two categories: natural language understanding (NLU) (for tasks such as dialect classification, sentiment analysis, parsing, and NLU benchmarks) and natural language generation (NLG) (for summarisation, machine translation, and dialogue systems). The survey is also broad in its coverage of languages which include English, Arabic, German, among others. We observe that past work in NLP concerning dialects goes deeper than mere dialect classification, and extends to several NLU and NLG tasks. For these tasks, we describe classical machine learning using statistical models, along with the recent deep learning-based approaches based on pre-trained language models. We expect that this survey will be useful to NLP researchers interested in building equitable language technologies by rethinking LLM benchmarks and model architectures.},
journal = {ACM Comput. Surv.},
month = feb,
articleno = {149},
numpages = {37},
keywords = {NLP, dialects, natural language processing, linguistic diversity, large language models, inclusion}
}

Copied!

Evaluating Dialect Robustness of Language Models via Conversation Understanding

Dipankar Srirag, Nihar Ranjan Sahoo, Aditya Joshi

SUMEval @ COLING 2025

PDF

BibTeX

@inproceedings{srirag-etal-2025-evaluating,
    title = "Evaluating Dialect Robustness of Language Models via Conversation Understanding",
    author = "Srirag, Dipankar  and
    Sahoo, Nihar Ranjan  and
    Joshi, Aditya",
    booktitle = "Proceedings of the Second Workshop on Scaling Up Multilingual {\&} Multi-Cultural Evaluation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2025.sumeval-2.3/",
    pages = "24--38",
    abstract = "With an evergrowing number of LLMs reporting superlative performance for English, their ability to perform equitably for different dialects of English (i.e., dialect robustness) needs to be ascertained. Specifically, we use English language (US English or Indian English) conversations between humans who play the word-guessing game of `taboo{`}. We formulate two evaluative tasks: target word prediction (TWP) (i.e., predict the masked target word in a conversation) and target word selection (TWS) (i.e., select the most likely masked target word in a conversation, from among a set of candidate words). Extending MD3, an existing dialectic dataset of taboo-playing conversations, we introduce M-MD3, a target-word-masked version of MD3 with the en-US and en-IN subsets. We create two subsets: en-MV (where en-US is transformed to include dialectal information) and en-TR (where dialectal information is removed from en-IN). We evaluate three multilingual LLMs{--}one open source (Llama3) and two closed-source (GPT-4/3.5). LLMs perform significantly better for US English than Indian English for both TWP and TWS tasks, for all settings, exhibiting marginalisation against the Indian dialect of English. While GPT-based models perform the best, the comparatively smaller models work more equitably after fine-tuning. Our evaluation methodology exhibits a novel and reproducible way to examine attributes of language models using pre-existing dialogue datasets with language varieties. Dialect being an artifact of one{'}s culture, this paper demonstrates the gap in the performance of multilingual LLMs for communities that do not use a mainstream dialect."
}

Copied!

2024

Comparison of Multilingual and Bilingual Models for Satirical News Detection of Arabic and English

Omar Abdulla, Aditya Joshi, Rahat Masood, Salil Kanhere

ALTA 2024

PDF

BibTeX

@inproceedings{abdalla-etal-2024-comparison,
title = "Comparison of Multilingual and Bilingual Models for Satirical News Detection of {A}rabic and {E}nglish",
author = "Abdalla, Omar W.  and
  Joshi, Aditya  and
  Masood, Rahat  and
  Kanhere, Salil S.",
editor = "Baldwin, Tim  and
  Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e}  and
  Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.14/",
pages = "173--178",
abstract = "Satirical news is real news combined with a humorous comment or exaggerated content, and it often mimics the format and style of real news. However, satirical news is often misunderstood as misinformation, especially by individuals from different cultural and social backgrounds. This research addresses the challenge of distinguishing satire from truthful news by leveraging multilingual satire detection methods in English and Arabic. We explore both zero-shot and chain-of-thought (CoT) prompting using two language models, Jais-chat(13B) and LLaMA-2-chat(7B). Our results show that CoT prompting offers a significant advantage for the Jais-chat model over the LLaMA-2-chat model. Specifically, Jais-chat achieved the best performance, with an F1-score of 80{\%} in English when using CoT prompting. These results high- light the importance of structured reasoning in CoT, which enhances contextual understanding and is vital for complex tasks like satire detection."
}

Copied!

"Is Hate Lost in Translation?": Evaluation of Multilingual LGBTQIA+ Hate Speech Detection

Fai Lu Chan, Duke Nguyen, Aditya Joshi

ALTA 2024

PDF

BibTeX

@inproceedings{chan-etal-2024-hate,
title = "``Is Hate Lost in Translation?'': Evaluation of Multilingual {LGBTQIA}+ Hate Speech Detection",
author = "Chan, Fai Leui  and
  Nguyen, Duke  and
  Joshi, Aditya",
editor = "Baldwin, Tim  and
  Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e}  and
  Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.11/",
pages = "146--152",
abstract = "This paper explores the challenges of detecting LGBTQIA+ hate speech of large language models across multiple languages, including English, Italian, Chinese and (code-mixed) English-Tamil, examining the impact of machine translation and whether the nuances of hate speech are preserved across translation. We examine the hate speech detection ability of zero-shot and fine-tuned GPT. Our findings indicate that: (1) English has the highest performance and the code-mixing scenario of English-Tamil being the lowest, (2) fine-tuning improves performance consistently across languages whilst translation yields mixed results. Through simple experimentation with original text and machine-translated text for hate speech detection along with a qualitative error analysis, this paper sheds light on the socio-cultural nuances and complexities of languages that may not be captured by automatic translation."
}

Copied!

Striking a Balance between Classical and Deep Learning Approaches in Natural Language Processing Pedagogy

Aditya Joshi, Jake Renzella, Pushpak Bhattacharya, et al.

TeachingNLP @ ACL 2024

PDF

BibTeX

@inproceedings{joshi-etal-2024-striking-balance,
title = "Striking a Balance between Classical and Deep Learning Approaches in Natural Language Processing Pedagogy",
author = "Joshi, Aditya  and
  Renzella, Jake  and
  Bhattacharyya, Pushpak  and
  Jha, Saurav  and
  Zhang, Xiangyu",
editor = {Al-azzawi, Sana  and
  Biester, Laura  and
  Kov{\'a}cs, Gy{\"o}rgy  and
  Marasovi{\'c}, Ana  and
  Mathur, Leena  and
  Mieskes, Margot  and
  Weissweiler, Leonie},
booktitle = "Proceedings of the Sixth Workshop on Teaching NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.teachingnlp-1.4/",
pages = "23--32",
abstract = "While deep learning approaches represent the state-of-the-art of natural language processing (NLP) today, classical algorithms and approaches still find a place in NLP textbooks and courses of recent years. This paper discusses the perspectives of conveners of two introductory NLP courses taught in Australia and India, and examines how classical and deep learning approaches can be balanced within the lecture plan and assessments of the courses. We also draw parallels with the objects-first and objects-later debate in CS1 education. We observe that teaching classical approaches adds value to student learning by building an intuitive understanding of NLP problems, potential solutions, and even deep learning models themselves. Despite classical approaches not being state-of-the-art, the paper makes a case for their inclusion in NLP courses today."
}

Copied!

BAMBINO-LM: (Bilingual-)Human-Inspired Continual Pre-training of BabyLM

Zhewen Shen, Aditya Joshi, Ruey-Cheng Chen

CMCL @ ACL 2024

PDF

BibTeX

@inproceedings{shen-etal-2024-bambino,
title = "{BAMBINO}-{LM}: (Bilingual-)Human-Inspired Continual Pre-training of {B}aby{LM}",
author = "Shen, Zhewen  and
  Joshi, Aditya  and
  Chen, Ruey-Cheng",
editor = "Kuribayashi, Tatsuki  and
  Rambelli, Giulia  and
  Takmaz, Ece  and
  Wicke, Philipp  and
  Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cmcl-1.1/",
doi = "10.18653/v1/2024.cmcl-1.1",
pages = "1--7",
abstract = "Children from bilingual backgrounds benefit from interactions with parents and teachers to re-acquire their heritage language. In this paper, we investigate how this insight from behavioral study can be incorporated into the learning of small-scale language models. We introduce BAMBINO-LM, a continual pre-training strategy for BabyLM that uses a novel combination of alternation and PPO-based perplexity reward induced from a parent Italian model. Upon evaluation on zero-shot classification tasks for English and Italian, BAMBINO-LM improves the Italian language capability of a BabyLM baseline. Our ablation analysis demonstrates that employing both the alternation strategy and PPO-based modeling is key to this effectiveness gain. We also show that, as a side effect, the proposed method leads to a similar degradation in L1 effectiveness as human children would have had in an equivalent learning scenario. Through its modeling and findings, BAMBINO-LM makes a focused contribution to the pre-training of small-scale language models by first developing a human-inspired strategy for pre-training and then showing that it results in behaviours similar to that of humans."
}

Copied!

AuditNet: A Conversational AI-based Security Assistant

Shohreh Deldari, Mohammd Goudrazi, Aditya Joshi, et al.

MobileHCI

PDF

BibTeX

@inproceedings{10.1145/3640471.3680444,
author = {Deldari, Shohreh and Goudarzi, Mohammad and Joshi, Aditya and Shaghaghi, Arash and Finn, Simon and Salim, Flora D. and Jha, Sanjay},
title = {AuditNet: Conversational AI Security Assistant},
year = {2024},
isbn = {9798400705069},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3640471.3680444},
doi = {10.1145/3640471.3680444},
abstract = {In the age of information overload, professionals across various fields face the challenge of navigating vast amounts of documentation and ever-evolving standards. Ensuring compliance with standards, regulations, and contractual obligations is a critical yet complex task across various professional fields. We propose a versatile conversational AI assistant framework designed to facilitate compliance checking on the go, in diverse domains, including but not limited to network infrastructure, legal contracts, educational standards, environmental regulations, and government policies. By leveraging retrieval-augmented generation using large language models, our framework automates the review, indexing, and retrieval of relevant, context-aware information, streamlining the process of verifying adherence to established guidelines and requirements. This AI assistant not only reduces the manual effort involved in compliance checks but also enhances accuracy and efficiency, supporting professionals in maintaining high standards of practice and ensuring regulatory compliance in their respective fields. We propose and demonstrate AuditNet, the first conversational AI security assistant designed to assist IoT network security experts by providing instant access to security standards, policies, and regulations.},
booktitle = {Adjunct Proceedings of the 26th International Conference on Mobile Human-Computer Interaction},
articleno = {22},
numpages = {4},
keywords = {Prompt Engineering, Question Answering, Retrieval-Augmented Generation},
location = {Melbourne, VIC, Australia},
series = {MobileHCI '24 Adjunct}
}

Copied!

Spectraformer: A Unified Random Feature Framework for Transformer

Duke Nguyen, Du Yin, Aditya Joshi, Flora Salim

ACM Transactions on Intelligent Systems and Technology

PDF Code

BibTeX

@article{10.1145/3768161,
author = {Nguyen, Duke and Yin, Du and Joshi, Aditya and Salim, Flora},
title = {Spectraformer: A Unified Random Feature Framework for Transformer},
year = {2026},
issue_date = {June 2026},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {17},
number = {3},
issn = {2157-6904},
url = {https://doi.org/10.1145/3768161},
doi = {10.1145/3768161},
abstract = {Linearization of attention using various kernel approximation and kernel learning techniques has shown promise. Past methods used a subset of combinations of component functions and weight matrices within the random feature paradigm. We identify the need for a systematic comparison of different combinations of weight matrices and component functions for attention learning in Transformer. Hence, we introduce Spectraformer, a unified framework for approximating and learning the kernel function in the attention mechanism of the Transformer. Our empirical results demonstrate, for the first time, that a random feature-based approach can achieve performance comparable to top-performing sparse and low-rank methods on the challenging Long-Range Arena benchmark. Thus, we establish a new state-of-the-art for random feature-based efficient Transformers. The framework also produces many variants that offer different advantages in accuracy, training time, and memory consumption. Our code is available at: .},
journal = {ACM Trans. Intell. Syst. Technol.},
month = mar,
articleno = {50},
numpages = {29},
keywords = {transformers, kernel, linearized attention, kernelized attention}
}

Copied!

2023

Stacking the Odds: Transformer-Based Ensemble for AI-Generated Text Detection

Duke Nguyen, Khaing Myat Noe Naing, Aditya Joshi

ALTA 2023

PDF

BibTeX

@inproceedings{nguyen-etal-2023-stacking,
    title = "Stacking the Odds: Transformer-Based Ensemble for {AI}-Generated Text Detection",
    author = "Nguyen, Duke  and
    Naing, Khaing Myat Noe  and
    Joshi, Aditya",
    editor = "Lau, Jey Han",
    booktitle = "Proceedings of the 21st Annual Workshop of the Australasian Language Technology Association",
    month = nov,
    year = "2023",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.alta-1.22/",
    pages = "173--178",
    abstract = "This paper reports our submission under the team name `SynthDetectives' to the ALTA 2023 Shared Task. We use a stacking ensemble of Transformers for the task of AI-generated text detection. Our approach is novel in terms of its choice of models in that we use accessible and lightweight models in the ensemble. We show that ensembling the models results in an improved accuracy in comparison with using them individually. Our approach achieves an accuracy score of 0.9555 on the official test data provided by the shared task organisers."
}

Copied!

Evaluation of large language models using an Indian language LGBTI+ lexicon

Aditya Joshi, Shruta Rawat, Alpana Dange

The AI Ethics Journal

PDF

BibTeX

@article{Joshi2023,
title = {Evaluation of Large Language Models Using an Indian Language LGBTI+ Lexicon},
volume = {4},
ISSN = {2690-1625},
url = {http://dx.doi.org/10.47289/aiej20231109},
DOI = {10.47289/aiej20231109},
number = {1},
journal = {AI Ethics Journal},
publisher = {AI Robotics Ethics Society},
author = {Joshi,  Aditya and Rawat,  Shruta},
year = {2023},
month = nov 
}

Copied!

Striking a Balance between Classical and Deep Learning Approaches in Natural Language Processing Pedagogy

Organisers of Queer in AI, Aditya Joshi, et al.

ACM FAccT 2023

PDF

BibTeX

@inproceedings{10.1145/3593013.3594134,
author = {Queerinai, Organizers Of and Ovalle, Anaelia and Subramonian, Arjun and Singh, Ashwin and Voelcker, Claas and Sutherland, Danica J. and Locatelli, Davide and Breznik, Eva and Klubicka, Filip and Yuan, Hang and J, Hetvi and Zhang, Huan and Shriram, Jaidev and Lehman, Kruno and Soldaini, Luca and Sap, Maarten and Deisenroth, Marc Peter and Pacheco, Maria Leonor and Ryskina, Maria and Mundt, Martin and Agarwal, Milind and Mclean, Nyx and Xu, Pan and Pranav, A and Korpan, Raj and Ray, Ruchira and Mathew, Sarah and Arora, Sarthak and John, St and Anand, Tanvi and Agrawal, Vishakha and Agnew, William and Long, Yanan and Wang, Zijie J. and Talat, Zeerak and Ghosh, Avijit and Dennler, Nathaniel and Noseworthy, Michael and Jha, Sharvani and Baylor, Emi and Joshi, Aditya and Bilenko, Natalia Y. and Mcnamara, Andrew and Gontijo-Lopes, Raphael and Markham, Alex and Dong, Evyn and Kay, Jackie and Saraswat, Manu and Vytla, Nikhil and Stark, Luke},
title = {Queer In AI: A Case Study in Community-Led Participatory AI},
year = {2023},
isbn = {9798400701924},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3593013.3594134},
doi = {10.1145/3593013.3594134},
abstract = {Queerness and queer people face an uncertain future in the face of ever more widely deployed and invasive artificial intelligence (AI). These technologies have caused numerous harms to queer people, including privacy violations, censoring and downranking queer content, exposing queer people and spaces to harassment by making them hypervisible, deadnaming and outing queer people. More broadly, they have violated core tenets of queerness by classifying and controlling queer identities. In response to this, the queer community in AI has organized Queer in AI, a global, decentralized, volunteer-run grassroots organization that employs intersectional and community-led participatory design to build an inclusive and equitable AI future. In this paper, we present Queer in AI as a case study for community-led participatory design in AI. We examine how participatory design and intersectional tenets started and shaped this community’s programs over the years. We discuss different challenges that emerged in the process, look at ways this organization has fallen short of operationalizing participatory and intersectional principles, and then assess the organization’s impact. Queer in AI provides important lessons and insights for practitioners and theorists of participatory methods broadly through its rejection of hierarchy in favor of decentralization, success at building aid and programs by and for the queer community, and effort to change actors and institutions outside of the queer community. Finally, we theorize how communities like Queer in AI contribute to the participatory design in AI more broadly by fostering cultures of participation in AI, welcoming and empowering marginalized participants, critiquing poor or exploitative participatory practices, and bringing participation to institutions outside of individual research projects. Queer in AI’s work serves as a case study of grassroots activism and participatory methods within AI, demonstrating the potential of community-led participatory methods and intersectional praxis, while also providing challenges, case studies, and nuanced insights to researchers developing and using participatory methods.},
booktitle = {Proceedings of the 2023 ACM Conference on Fairness, Accountability, and Transparency},
pages = {1882–1895},
numpages = {14},
location = {Chicago, IL, USA},
series = {FAccT '23}
}

Copied!