2024
Ferrer-i-Cancho, R.
The optimal placement of the head in the noun phrase. The case of demonstrative, numeral, adjective and noun Journal Article
In: Journal of Quantitative Linguistics, vol. 32, no. 1, pp. 26-53, 2024.
Abstract | Links | BibTeX | Tags: word order, Zipf's law of abbreviation
@article{Ferrer2023b,
title = {The optimal placement of the head in the noun phrase. The case of demonstrative, numeral, adjective and noun},
author = {R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2402.10311},
doi = {10.1080/09296174.2024.2400847},
year = {2024},
date = {2024-01-01},
journal = {Journal of Quantitative Linguistics},
volume = {32},
number = {1},
pages = {26-53},
abstract = {The word order of a sentence is shaped by multiple principles. The principle of syntactic dependency distance minimization is in conflict with the principle of surprisal minimization (or predictability maximization) in single head syntactic dependency structures: while the former predicts that the head should be placed at the center of the linear arrangement, the latter predicts that the head should be placed at one of the ends (either first or last). A critical question is when surprisal minimization (or predictability maximization) should surpass syntactic dependency distance minimization. In the context of single head structures, it has been predicted that this is more likely to happen when two conditions are met, i.e. (a) fewer words are involved and (b) words are shorter. Here we test the prediction on the noun phrase when it is composed of a demonstrative, a numeral, an adjective and a noun. We find that, across preferred orders in languages, the noun tends to be placed at one of the ends, confirming the theoretical prediction. We also show evidence of anti locality effects: syntactic dependency distances in preferred orders are longer than expected by chance.},
keywords = {word order, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2023
Petrini, S.; Casas-i-Muñoz, A.; Cluet-i-Martinell, J.; Wang, M.; Bentz, C.; Ferrer-i-Cancho, R.
The optimality of word lengths. Theoretical foundations and an empirical study. Journal Article
In: 2023.
Abstract | Links | BibTeX | Tags: Zipf's law of abbreviation
@article{Petrini2022a,
title = {The optimality of word lengths. Theoretical foundations and an empirical study.},
author = {S. Petrini and A. Casas-i-Muñoz and J. Cluet-i-Martinell and M. Wang and C. Bentz and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2208.10384},
year = {2023},
date = {2023-01-01},
abstract = {One of the most robust patterns found in human languages is Zipf's law of abbreviation, that is, the tendency of more frequent words to be shorter. Since Zipf's pioneering research, this law has been viewed as a manifestation of compression, i.e. the minimization of the length of forms - a universal principle of natural communication. Although the claim that languages are optimized has become trendy, attempts to measure the degree of optimization of languages have been rather scarce. Here we demonstrate that compression manifests itself in a wide sample of languages without exceptions, and independently of the unit of measurement. It is detectable for both word lengths in characters of written language as well as durations in time in spoken language. Moreover, to measure the degree of optimization, we derive a simple formula for a random baseline and present two scores that are dualy normalized, namely, they are normalized with respect to both the minimum and the random baseline. We analyze the theoretical and statistical advantages and disadvantages of these and other scores. Harnessing the best score, we quantify for the first time the degree of optimality of word lengths in languages. This indicates that languages are optimized to 62 or 67 percent on average (depending on the source) when word lengths are measured in characters, and to 65 percent on average when word lengths are measured in time. In general, spoken word durations are more optimized than written word lengths in characters. Beyond the analyses reported here, our work paves the way to measure the degree of optimality of the vocalizations or gestures of other species, and to compare them against written, spoken, or signed human languages.},
keywords = {Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Petrini, S.; Casas-i-Muñoz, A.; Cluet-i-Martinell, J.; Wang, M.; Bentz, C.; Ferrer-i-Cancho, R.
Direct and indirect evidence of compression of word lengths. Zip's law of abbreviation revisited. Journal Article
In: Glottometrics, vol. 54, pp. 58-87, 2023.
Abstract | Links | BibTeX | Tags: Zipf's law of abbreviation
@article{Petrini2022b,
title = {Direct and indirect evidence of compression of word lengths. Zip's law of abbreviation revisited.},
author = {S. Petrini and A. Casas-i-Muñoz and J. Cluet-i-Martinell and M. Wang and C. Bentz and R. Ferrer-i-Cancho},
url = {http://arxiv.org/abs/2303.10128},
doi = {10.53482/2023_54_407},
year = {2023},
date = {2023-01-01},
journal = {Glottometrics},
volume = {54},
pages = {58-87},
abstract = {Zipf's law of abbreviation, the tendency of more frequent words to be shorter, is one of the most solid candidates for a linguistic universal, in the sense that it has the potential for being exceptionless or with a number of exceptions that is vanishingly small compared to the number of languages on Earth. Since Zipf's pioneering research, this law has been viewed as a manifestation of a universal principle of communication, i.e. the minimization of word lengths, to reduce the effort of communication. Here we revisit the concordance of written language with the law of abbreviation. Crucially, we provide wider evidence that the law holds also in speech (when word length is measured in time), in particular in 46 languages from 14 linguistic families. Agreement with the law of abbreviation provides indirect evidence of compression of languages via the theoretical argument that the law of abbreviation is a prediction of optimal coding. Motivated by the need of direct evidence of compression, we derive a simple formula for a random baseline indicating that word lengths are systematically below chance, across linguistic families and writing systems, and independently of the unit of measurement (length in characters or duration in time). Our work paves the way to measure and compare the degree of optimality of word lengths in languages.},
keywords = {Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2022
Safryghin, A.; Cross, C.; Fallon, B.; Heesen, R.; Ferrer-i-Cancho, R.; Hobaiter, C.
Variable expression of linguistic laws in ape gesture: a case study from chimpanzee sexual solicitation Journal Article
In: Royal Society Open Science, vol. 9, pp. 9220849, 2022.
Abstract | Links | BibTeX | Tags: Menzerath's law, Zipf's law of abbreviation
@article{Safryghin2022a,
title = {Variable expression of linguistic laws in ape gesture: a case study from chimpanzee sexual solicitation},
author = {A. Safryghin and C. Cross and B. Fallon and R. Heesen and R. Ferrer-i-Cancho and C. Hobaiter},
url = {https://www.biorxiv.org/content/10.1101/2021.05.19.444810v3},
doi = {10.1098/rsos.220849},
year = {2022},
date = {2022-01-01},
journal = {Royal Society Open Science},
volume = {9},
pages = {9220849},
abstract = {Two language laws have been identified as consistent patterns shaping animal behaviour, both acting on the organizational level of communicative systems. Zipf's law of brevity describes a negative relationship between behavioural length and frequency. Menzerath's law defines a negative correlation between the number of behaviours in a sequence and average length of the behaviour composing it. Both laws have been linked with the information-theoretic principle of compression, which tends to minimize code length. We investigated their presence in a case study of male chimpanzee sexual solicitation gesture. We failed to find evidence supporting Zipf's law of brevity, but solicitation gestures followed Menzerath's law: longer sequences had shorter average gesture duration. Our results extend previous findings suggesting gesturing may be limited by individual energetic constraints. However, such patterns may only emerge in sufficiently large datasets. Chimpanzee gestural repertoires do not appear to manifest a consistent principle of compression previously described in many other close-range systems of communication. Importantly, the same signallers and signals were previously shown to adhere to these laws in subsets of the repertoire when used in play; highlighting that, in addition to selection on the signal repertoire, ape gestural expression appears shaped by factors in the immediate socio-ecological context.},
keywords = {Menzerath's law, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Lusseau, D.; McCowan, B.
Parallels of human language in the behavior of bottlenose dolphins Journal Article
In: Linguistic Frontiers, vol. 5, no. 1, pp. 5-11, 2022.
Abstract | Links | BibTeX | Tags: Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Ferrer2022a,
title = {Parallels of human language in the behavior of bottlenose dolphins},
author = {R. Ferrer-i-Cancho and D. Lusseau and B. McCowan},
url = {https://arxiv.org/abs/1605.01661},
doi = {10.2478/lf-2022-0002},
year = {2022},
date = {2022-01-01},
journal = {Linguistic Frontiers},
volume = {5},
number = {1},
pages = {5-11},
abstract = {Dolphins exhibit striking similarities with humans. Here we review them with the help of quantitative linguistics and information theory. Various statistical laws of language that are well-known in quantitative linguistics, i.e. Zipf’s law for word frequencies, the law of meaning distribution, the law of abbreviation and Menzerath’s law, have been found in dolphin vocal or gestural behavior. The information theory of these laws suggests that humans and dolphins share cost-cutting principles of organization.},
keywords = {Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Semple, S.; Ferrer-i-Cancho, R.; Gustison, M.
Linguistic laws in biology Journal Article
In: Trends in Ecology and Evolution, vol. 37, no. 1, pp. 53-66, 2022.
Abstract | Links | BibTeX | Tags: Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Semple2021a,
title = {Linguistic laws in biology},
author = {S. Semple and R. Ferrer-i-Cancho and M. Gustison},
url = {https://arxiv.org/abs/2310.07387},
doi = {10.1016/j.tree.2021.08.012},
year = {2022},
date = {2022-01-01},
journal = {Trends in Ecology and Evolution},
volume = {37},
number = {1},
pages = {53-66},
abstract = {Linguistic laws, the common statistical patterns of human language, have been investigated by quantitative linguists for nearly a century. Recently, biologists from a range of disciplines have started to explore the prevalence of these laws beyond language, finding patterns consistent with linguistic laws across multiple levels of biological organisation, from molecular (genomes, genes, and proteins) to organismal (animal behaviour) to ecological (populations and ecosystems). We propose a new conceptual framework for the study of linguistic laws in biology, comprising and integrating distinct levels of analysis, from description to prediction to theory building. Adopting this framework will provide critical new insights into the fundamental rules of organisation underpinning natural systems, unifying linguistic laws and core theory in biology.},
keywords = {Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Bentz, C.; Seguin, C.
Optimal coding and the origins of Zipfian laws Journal Article
In: Journal of Quantitative Linguistics, vol. 29, no. 2, pp. 165-194, 2022.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Ferrer2019c,
title = {Optimal coding and the origins of Zipfian laws},
author = {R. Ferrer-i-Cancho and C. Bentz and C. Seguin},
url = {https://arxiv.org/abs/1906.01545},
doi = {10.1080/09296174.2020.1778387},
year = {2022},
date = {2022-01-01},
journal = {Journal of Quantitative Linguistics},
volume = {29},
number = {2},
pages = {165-194},
abstract = {The problem of compression in standard information theory consists of assigning codes as short as possible to numbers. Here we consider the problem of optimal coding – under an arbitrary coding scheme – and show that it predicts Zipf's law of abbreviation, namely a tendency in natural languages for more frequent words to be shorter. We apply this result to investigate optimal coding also under so-called non-singular coding, a scheme where unique segmentation is not warranted but codes stand for a distinct number. Optimal non-singular coding predicts that the length of a word should grow approximately as the logarithm of its frequency rank, which is again consistent with Zipf's law of abbreviation. Optimal non-singular coding in combination with the maximum entropy principle also predicts Zipf's rank-frequency distribution. Furthermore, our findings on optimal non-singular coding challenge common beliefs about random typing. It turns out that random typing is in fact an optimal coding process, in stark contrast with the common assumption that it is detached from cost cutting considerations. Finally, we discuss the implications of optimal coding for the construction of a compact theory of Zipfian laws and other linguistic laws.},
keywords = {information theory, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2019
Casas, B.; Hernández-Fernández, A.; Català, N.; Ferrer-i-Cancho, R.; Baixeries, J.
Polysemy and brevity versus frequency in language Journal Article
In: Computer Speech and Language, vol. 58, pp. 19 – 50, 2019.
Abstract | Links | BibTeX | Tags: Zipf's law of abbreviation, Zipf's meaning-frequency law
@article{Casas2019b,
title = {Polysemy and brevity versus frequency in language},
author = {B. Casas and A. Hernández-Fernández and N. Català and R. Ferrer-i-Cancho and J. Baixeries},
doi = {10.1016/j.csl.2019.03.007},
year = {2019},
date = {2019-01-01},
journal = {Computer Speech and Language},
volume = {58},
pages = {19 – 50},
abstract = {The pioneering research of G. K. Zipf on the relationship between word frequency and other word features led to the formulation of various linguistic laws. The most popular is Zipf’s law for word frequencies. Here we focus on two laws that have been studied less intensively: the meaning-frequency law, i.e. the tendency of more frequent words to be more polysemous, and the law of abbreviation, i.e. the tendency of more frequent words to be shorter. In a previous work, we tested the robustness of these Zipfian laws for English, roughly measuring word length in number of characters and distinguishing adult from child speech. In the present article, we extend our study to other languages (Dutch and Spanish) and introduce two additional measures of length: syllabic length and phonemic length. Our correlation analysis indicates that both the meaning-frequency law and the law of abbreviation hold overall in all the analyzed languages.},
keywords = {Zipf's law of abbreviation, Zipf's meaning-frequency law},
pubstate = {published},
tppubtype = {article}
}
Heesen, R.; Hobaiter, C.; Ferrer-i-Cancho, R.; Semple, S.
Linguistic laws in chimpanzee gestural communication Journal Article
In: Proceedings of the Royal Society B: Biological Sciences, vol. 286, pp. 20182900, 2019.
Abstract | Links | BibTeX | Tags: Menzerath's law, Zipf's law of abbreviation
@article{Heesen2019a,
title = {Linguistic laws in chimpanzee gestural communication},
author = {R. Heesen and C. Hobaiter and R. Ferrer-i-Cancho and S. Semple},
doi = {10.1098/rspb.2018.2900},
year = {2019},
date = {2019-01-01},
journal = {Proceedings of the Royal Society B: Biological Sciences},
volume = {286},
pages = {20182900},
abstract = {Studies testing linguistic laws outside language have provided important insights into the organization of biological systems. For example, patterns consistent with Zipf's law of abbreviation (which predicts a negative relationship between word length and frequency of use) have been found in the vocal and non-vocal behaviour of a range of animals, and patterns consistent with Menzerath's law (according to which longer sequences are made up of shorter constituents) have been found in primate vocal sequences, and in genes, proteins and genomes. Both laws have been linked to compression-the information theoretic principle of minimizing code length. Here, we present the first test of these laws in animal gestural communication. We initially did not find the negative relationship between gesture duration and frequency of use predicted by Zipf's law of abbreviation, but this relationship was seen in specific subsets of the repertoire. Furthermore, a pattern opposite to that predicted was seen in one subset of gestures-whole body signals. We found a negative correlation between number and mean duration of gestures in sequences, in line with Menzerath's law. These results provide the first evidence that compression underpins animal gestural communication, and highlight an important commonality between primate gesturing and language.},
keywords = {Menzerath's law, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2016
Bentz, C.; Ferrer-i-Cancho, R.
Zipf's law of abbreviation as a language universal Proceedings Article
In: Bentz, Christian; Jäger, Gerhard; Yanovich, Igor (Ed.): Proceedings of the Leiden Workshop on Capturing Phylogenetic Algorithms for Linguistics, University of Tübingen, 2016.
Abstract | Links | BibTeX | Tags: Zipf's law of abbreviation
@inproceedings{Bentz2016a,
title = {Zipf's law of abbreviation as a language universal},
author = {C. Bentz and R. Ferrer-i-Cancho},
editor = {Christian Bentz and Gerhard Jäger and Igor Yanovich},
url = {http://hdl.handle.net/10900/68639},
doi = {10.15496/publikation-10057},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the Leiden Workshop on Capturing Phylogenetic Algorithms for Linguistics},
publisher = {University of Tübingen},
abstract = {Words that are used more frequently tend to be shorter. This statement is known as Zipf’s law of abbreviation. Here we perform the widest investigation of the presence of the law to date. In a sample of 1262 texts and 986 different languages - about 13% of the world’s language diversity - a negative correlation between word frequency and word length is found in all cases. In line with Zipf’s original proposal, we argue that this universal trend is likely to derive from fundamental principles of information processing and transfer.},
keywords = {Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {inproceedings}
}
Hernández-Fernández, A.; Casas, B.; Ferrer-i-Cancho, R.; Baixeries, J.
Testing the robustness of laws of polysemy and brevity versus frequency Proceedings Article
In: Král, P.; Martín-Vide, C. (Ed.): 4th International Conference on Statistical Language and Speech Processing (SLSP 2016). Lecture Notes in Computer Science 9918, pp. 19–29, 2016.
Abstract | Links | BibTeX | Tags: child language, Zipf's law of abbreviation, Zipf's meaning-frequency law
@inproceedings{Hernandez2016a,
title = {Testing the robustness of laws of polysemy and brevity versus frequency},
author = {A. Hernández-Fernández and B. Casas and R. Ferrer-i-Cancho and J. Baixeries},
editor = {P. Král and C. Martín-Vide},
doi = {10.1007/978-3-319-45925-7_2},
year = {2016},
date = {2016-01-01},
booktitle = {4th International Conference on Statistical Language and Speech Processing (SLSP 2016). Lecture Notes in Computer Science 9918},
pages = {19–29},
abstract = {The pioneering research of G.K. Zipf on the relationship between word frequency and other word features led to the formulation of various linguistic laws. Here we focus on a couple of them: the meaning-frequency law, i.e. the tendency of more frequent words to be more polysemous, and the law of abbreviation, i.e. the tendency of more frequent words to be shorter. Here we evaluate the robustness of these laws in contexts where they have not been explored yet to our knowledge. The recovery of the laws again in new conditions provides support for the hypothesis that they originate from abstract mechanisms.},
keywords = {child language, Zipf's law of abbreviation, Zipf's meaning-frequency law},
pubstate = {published},
tppubtype = {inproceedings}
}
2015
Semple, S.; Ferrer-i-Cancho, R.; Bergman, T.; Hsu, M.; Agoramoorthy, G.; Gustison, M.
Linguistic laws in primate vocal communication Proceedings Article
In: Proceedings of the 6th European Federation for Primatology Meeting, XXII Italian Association of Primatology Congress Rome, Italy, August 25-28. Folia Primatologica 86, 357, 2015.
Links | BibTeX | Tags: Menzerath's law, Zipf's law of abbreviation
@inproceedings{Semple2015a,
title = {Linguistic laws in primate vocal communication},
author = {S. Semple and R. Ferrer-i-Cancho and T. Bergman and M. Hsu and G. Agoramoorthy and M. Gustison},
doi = {10.1159/000435825},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the 6th European Federation for Primatology Meeting, XXII Italian Association of Primatology Congress Rome, Italy, August 25-28.
Folia Primatologica 86, 357},
keywords = {Menzerath's law, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {inproceedings}
}
Ferrer-i-Cancho, R.; Bentz, C.; Seguin, C.
Compression and the origins of Zipf's law of abbreviation Journal Article
In: 2015.
Links | BibTeX | Tags: information theory, Zipf's law of abbreviation
@article{Ferrer2015a,
title = {Compression and the origins of Zipf's law of abbreviation},
author = {R. Ferrer-i-Cancho and C. Bentz and C. Seguin},
url = {http://arxiv.org/abs/1504.04884},
year = {2015},
date = {2015-01-01},
keywords = {information theory, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2013
Ferrer-i-Cancho, R.; Hernández-Fernández, A.; Lusseau, D.; Agoramoorthy, G.; Hsu, M. J.; Semple, S.
Compression as a universal principle of animal behavior Journal Article
In: Cognitive Science, vol. 37, no. 8, pp. 1565-1578, 2013.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law of abbreviation
@article{Ferrer2012d,
title = {Compression as a universal principle of animal behavior},
author = {R. Ferrer-i-Cancho and A. Hernández-Fernández and D. Lusseau and G. Agoramoorthy and M. J. Hsu and S. Semple},
doi = {10.1088/1742-5468/2012/06/P06002},
year = {2013},
date = {2013-01-01},
journal = {Cognitive Science},
volume = {37},
number = {8},
pages = {1565-1578},
abstract = {A key aim in biology and psychology is to identify fundamental principles underpinning the behavior of animals, including humans. Analyses of human language and the behavior of a range of non‐human animal species have provided evidence for a common pattern underlying diverse behavioral phenomena: Words follow Zipf's law of brevity (the tendency of more frequently used words to be shorter), and conformity to this general pattern has been seen in the behavior of a number of other animals. It has been argued that the presence of this law is a sign of efficient coding in the information theoretic sense. However, no strong direct connection has been demonstrated between the law and compression, the information theoretic principle of minimizing the expected length of a code. Here, we show that minimizing the expected code length implies that the length of a word cannot increase as its frequency increases. Furthermore, we show that the mean code length or duration is significantly small in human language, and also in the behavior of other species in all cases where agreement with the law of brevity has been found. We argue that compression is a general principle of animal behavior that reflects selection for efficiency of coding.},
keywords = {information theory, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2009
Ferrer-i-Cancho, R.; Lusseau, D.
Efficient coding in dolphin surface behavioral patterns Journal Article
In: Complexity, vol. 14, no. 5, pp. 23-25, 2009.
Abstract | Links | BibTeX | Tags: Zipf's law of abbreviation
@article{Ferrer2009g,
title = {Efficient coding in dolphin surface behavioral patterns},
author = {R. Ferrer-i-Cancho and D. Lusseau},
doi = {10.1002/cplx.20266},
year = {2009},
date = {2009-01-01},
journal = {Complexity},
volume = {14},
number = {5},
pages = {23-25},
abstract = {We show that the law of brevity, i.e. the tendency of words to shorten as their frequency increases, is also found in dolphin surface behavioral patterns. As far as we know, this is the first evidence of the law in another species, suggesting that coding efficiency is not unique to humans.},
keywords = {Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
In case the fancy publication browser above fails, you can also try.