2022
Semple, S.; Ferrer-i-Cancho, R.; Gustison, M.
Linguistic laws in biology Journal Article
In: Trends in Ecology and Evolution, vol. 37, no. 1, pp. 53-66, 2022.
Abstract | Links | BibTeX | Tags: Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Semple2021a,
title = {Linguistic laws in biology},
author = {S. Semple and R. Ferrer-i-Cancho and M. Gustison},
url = {https://arxiv.org/abs/2310.07387},
doi = {10.1016/j.tree.2021.08.012},
year = {2022},
date = {2022-01-01},
journal = {Trends in Ecology and Evolution},
volume = {37},
number = {1},
pages = {53-66},
abstract = {Linguistic laws, the common statistical patterns of human language, have been investigated by quantitative linguists for nearly a century. Recently, biologists from a range of disciplines have started to explore the prevalence of these laws beyond language, finding patterns consistent with linguistic laws across multiple levels of biological organisation, from molecular (genomes, genes, and proteins) to organismal (animal behaviour) to ecological (populations and ecosystems). We propose a new conceptual framework for the study of linguistic laws in biology, comprising and integrating distinct levels of analysis, from description to prediction to theory building. Adopting this framework will provide critical new insights into the fundamental rules of organisation underpinning natural systems, unifying linguistic laws and core theory in biology.},
keywords = {Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Bentz, C.; Seguin, C.
Optimal coding and the origins of Zipfian laws Journal Article
In: Journal of Quantitative Linguistics, vol. 29, no. 2, pp. 165-194, 2022.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Ferrer2019c,
title = {Optimal coding and the origins of Zipfian laws},
author = {R. Ferrer-i-Cancho and C. Bentz and C. Seguin},
url = {https://arxiv.org/abs/1906.01545},
doi = {10.1080/09296174.2020.1778387},
year = {2022},
date = {2022-01-01},
journal = {Journal of Quantitative Linguistics},
volume = {29},
number = {2},
pages = {165-194},
abstract = {The problem of compression in standard information theory consists of assigning codes as short as possible to numbers. Here we consider the problem of optimal coding – under an arbitrary coding scheme – and show that it predicts Zipf's law of abbreviation, namely a tendency in natural languages for more frequent words to be shorter. We apply this result to investigate optimal coding also under so-called non-singular coding, a scheme where unique segmentation is not warranted but codes stand for a distinct number. Optimal non-singular coding predicts that the length of a word should grow approximately as the logarithm of its frequency rank, which is again consistent with Zipf's law of abbreviation. Optimal non-singular coding in combination with the maximum entropy principle also predicts Zipf's rank-frequency distribution. Furthermore, our findings on optimal non-singular coding challenge common beliefs about random typing. It turns out that random typing is in fact an optimal coding process, in stark contrast with the common assumption that it is detached from cost cutting considerations. Finally, we discuss the implications of optimal coding for the construction of a compact theory of Zipfian laws and other linguistic laws.},
keywords = {information theory, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Lusseau, D.; McCowan, B.
Parallels of human language in the behavior of bottlenose dolphins Journal Article
In: Linguistic Frontiers, vol. 5, no. 1, pp. 5-11, 2022.
Abstract | Links | BibTeX | Tags: Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation
@article{Ferrer2022a,
title = {Parallels of human language in the behavior of bottlenose dolphins},
author = {R. Ferrer-i-Cancho and D. Lusseau and B. McCowan},
url = {https://arxiv.org/abs/1605.01661},
doi = {10.2478/lf-2022-0002},
year = {2022},
date = {2022-01-01},
journal = {Linguistic Frontiers},
volume = {5},
number = {1},
pages = {5-11},
abstract = {Dolphins exhibit striking similarities with humans. Here we review them with the help of quantitative linguistics and information theory. Various statistical laws of language that are well-known in quantitative linguistics, i.e. Zipf’s law for word frequencies, the law of meaning distribution, the law of abbreviation and Menzerath’s law, have been found in dolphin vocal or gestural behavior. The information theory of these laws suggests that humans and dolphins share cost-cutting principles of organization.},
keywords = {Law of meaning distribution, Meaning-frequency law, Menzerath-Altmann law, theory construction, Zipf's law for word frequencies, Zipf's law of abbreviation},
pubstate = {published},
tppubtype = {article}
}
2021
Català, N.; Baixeries, J.; Ferrer-Cancho, R.; Padró, L.; Hernández-Fernández, A.
Zipf's laws of meaning in Catalan Journal Article
In: PLOS ONE, vol. 16, no. 12, pp. e0260849, 2021.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies, Zipf's meaning-frequency law
@article{Catala2021a,
title = {Zipf's laws of meaning in Catalan},
author = {N. Català and J. Baixeries and R. Ferrer-Cancho and L. Padró and A. Hernández-Fernández},
url = {https://arxiv.org/abs/2107.00042},
doi = {doi.org/10.1371/journal.pone.0260849},
year = {2021},
date = {2021-01-01},
journal = {PLOS ONE},
volume = {16},
number = {12},
pages = {e0260849},
abstract = {In his pioneering research, G. K. Zipf formulated a couple of statistical laws on the relationship between the frequency of a word with its number of meanings: the law of meaning distribution, relating the frequency of a word and its frequency rank, and the meaning-frequency law, relating the frequency of a word with its number of meanings. Although these laws were formulated more than half a century ago, they have been only investigated in a few languages. Here we present the first study of these laws in Catalan. We verify these laws in Catalan via the relationship among their exponents and that of the rank-frequency law. We present a new protocol for the analysis of these Zipfian laws that can be extended to other languages. We report the first evidence of two marked regimes for these laws in written language and speech, paralleling the two regimes in Zipf's rank-frequency law in large multi-author corpora discovered in early 2000s. Finally, the implications of these two regimes will be discussed.},
keywords = {Zipf's law for word frequencies, Zipf's meaning-frequency law},
pubstate = {published},
tppubtype = {article}
}
2020
Corral, A.; Serra, I.; Ferrer-i-Cancho, R.
Distinct flavors of Zipf's law and its maximum likelihood fitting: Rank-size and size-distribution representations Journal Article
In: Physical Review E, pp. 052113, 2020.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Corral2019a,
title = {Distinct flavors of Zipf's law and its maximum likelihood fitting: Rank-size and size-distribution representations},
author = {A. Corral and I. Serra and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/1908.01398},
doi = {10.1103/PhysRevE.102.052113},
year = {2020},
date = {2020-01-01},
journal = {Physical Review E},
pages = {052113},
abstract = {In the last years, researchers have realized the difficulties of fitting power-law distributions properly. These difficulties are higher in Zipf's systems, due to the discreteness of the variables and to the existence of two representations for these systems, i.e., two versions about which is the random variable to fit. The discreteness implies that a power law in one of the representations is not a power law in the other, and vice versa. We generate synthetic power laws in both representations and apply a state-of-the-art fitting method (based on maximum-likelihood plus a goodness-of-fit test) for each of the two random variables. It is important to stress that the method does not fit the whole distribution, but the tail, understood as the part of a distribution above a cut-off that separates non-power-law behavior from power-law behavior. We find that, no matter which random variable is power-law distributed, the rank-size representation is not adequate for fitting, whereas the representation in terms of the distribution of sizes leads to the recovery of the simulated exponents, may be with some bias.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2018
Ferrer-i-Cancho, R.
Optimization models of natural communication Journal Article
In: Journal of Quantitative Linguistics, vol. 25, no. 3, pp. 207-237, 2018.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies
@article{Ferrer2015b,
title = {Optimization models of natural communication},
author = {R. Ferrer-i-Cancho},
doi = {10.1080/09296174.2017.1366095},
year = {2018},
date = {2018-01-01},
journal = {Journal of Quantitative Linguistics},
volume = {25},
number = {3},
pages = {207-237},
abstract = {A family of information theoretic models of communication was introduced more than a decade ago to explain the origins of Zipf’s law for word frequencies. The family is a based on a combination of two information theoretic principles: maximization of mutual information between forms and meanings and minimization of form entropy. The family also sheds light on the origins of three other patterns: the principle of contrast; a related vocabulary learning bias; and the meaning-frequency law. Here two important components of the family, namely the information theoretic principles and the energy function that combines them linearly, are reviewed from the perspective of psycholinguistics, language learning, information theory and synergetic linguistics. The minimization of this linear function is linked to the problem of compression of standard information theory and might be tuned by self-organization.},
keywords = {information theory, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2016
Ferrer-i-Cancho, R.
Compression and the origins of Zipf's law for word frequencies Journal Article
In: Complexity, vol. 21, pp. 409-411, 2016.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies
@article{Ferrer2016b,
title = {Compression and the origins of Zipf's law for word frequencies},
author = {R. Ferrer-i-Cancho},
doi = {10.1002/cplx.21820},
year = {2016},
date = {2016-01-01},
journal = {Complexity},
volume = {21},
pages = {409-411},
abstract = {Here we sketch a new derivation of Zipf's law for word frequencies based on optimal coding. The structure of the derivation is reminiscent of Mandelbrot's random typing model but it has multiple advantages over random typing: (1) it starts from realistic cognitive pressures, (2) it does not require fine tuning of parameters, and (3) it sheds light on the origins of other statistical laws of language and thus can lead to a compact theory of linguistic laws. Our findings suggest that the recurrence of Zipf's law in human languages could originate from pressure for easy and fast communication.},
keywords = {information theory, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2015
Corral, A.; Boleda, G.; Ferrer-i-Cancho, R.
Zipf's law for word frequencies: word forms versus lemmas in long texts Journal Article
In: PLoS ONE, vol. 10, no. 7, pp. e0129031, 2015.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Corral2015a,
title = {Zipf's law for word frequencies: word forms versus lemmas in long texts},
author = {A. Corral and G. Boleda and R. Ferrer-i-Cancho},
doi = {10.1371/journal.pone.0129031},
year = {2015},
date = {2015-01-01},
journal = {PLoS ONE},
volume = {10},
number = {7},
pages = {e0129031},
abstract = {Zipf’s law is a fundamental paradigm in the statistics of written and spoken natural language as well as in other communication systems. We raise the question of the elementary units for which Zipf’s law should hold in the most natural way, studying its validity for plain word forms and for the corresponding lemma forms. We analyze several long literary texts comprising four languages, with different levels of morphological complexity. In all cases Zipf’s law is fulfilled, in the sense that a power-law distribution of word or lemma frequencies is valid for several orders of magnitude. We investigate the extent to which the word-lemma transformation preserves two parameters of Zipf’s law: the exponent and the low-frequency cut-off. We are not able to demonstrate a strict invariance of the tail, as for a few texts both exponents deviate significantly, but we conclude that the exponents are very similar, despite the remarkable transformation that going from words to lemmas represents, considerably affecting all ranges of frequencies. In contrast, the low-frequency cut-offs are less stable, tending to increase substantially after the transformation.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2013
Baixeries, J.; Elvevåg, B.; Ferrer-i-Cancho, R.
The evolution of the exponent of Zipf's law in language ontogeny Journal Article
In: PLoS ONE, vol. 8, no. 3, pp. e53227, 2013.
Abstract | Links | BibTeX | Tags: child language, vocabulary learning, Zipf's law for word frequencies
@article{Baixeries2012c,
title = {The evolution of the exponent of Zipf's law in language ontogeny},
author = {J. Baixeries and B. Elvevåg and R. Ferrer-i-Cancho},
doi = {10.1371/journal.pone.0053227},
year = {2013},
date = {2013-01-01},
journal = {PLoS ONE},
volume = {8},
number = {3},
pages = {e53227},
abstract = {It is well-known that word frequencies arrange themselves according to Zipf's law. However, little is known about the dependency of the parameters of the law and the complexity of a communication system. Many models of the evolution of language assume that the exponent of the law remains constant as the complexity of a communication systems increases. Using longitudinal studies of child language, we analysed the word rank distribution for the speech of children and adults participating in conversations. The adults typically included family members (e.g., parents) or the investigators conducting the research. Our analysis of the evolution of Zipf's law yields two main unexpected results. First, in children the exponent of the law tends to decrease over time while this tendency is weaker in adults, thus suggesting this is not a mere mirror effect of adult speech. Second, although the exponent of the law is more stable in adults, their exponents fall below 1 which is the typical value of the exponent assumed in both children and adults. Our analysis also shows a tendency of the mean length of utterances (MLU), a simple estimate of syntactic complexity, to increase as the exponent decreases. The parallel evolution of the exponent and a simple indicator of syntactic complexity (MLU) supports the hypothesis that the exponent of Zipf's law and linguistic complexity are inter-related. The assumption that Zipf's law for word ranks is a power-law with a constant exponent of one in both adults and children needs to be revised.},
keywords = {child language, vocabulary learning, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2009
Ferrer-i-Cancho, R.; Elvevåg, B.
Random texts do not exhibit the real Zipf's-law-like rank distribution Journal Article
In: PLoS ONE, vol. 5, no. 4, pp. e9411, 2009.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Ferrer2009b,
title = {Random texts do not exhibit the real Zipf's-law-like rank distribution},
author = {R. Ferrer-i-Cancho and B. Elvevåg},
doi = {10.1371/journal.pone.0009411},
year = {2009},
date = {2009-01-01},
journal = {PLoS ONE},
volume = {5},
number = {4},
pages = {e9411},
abstract = {Background Zipf's law states that the relationship between the frequency of a word in a text and its rank (the most frequent word has rank , the 2nd most frequent word has rank ,…) is approximately linear when plotted on a double logarithmic scale. It has been argued that the law is not a relevant or useful property of language because simple random texts - constructed by concatenating random characters including blanks behaving as word delimiters - exhibit a Zipf's law-like word rank distribution. Methodology/Principal Findings In this article, we examine the flaws of such putative good fits of random texts. We demonstrate - by means of three different statistical tests - that ranks derived from random texts and ranks derived from real texts are statistically inconsistent with the parameters employed to argue for such a good fit, even when the parameters are inferred from the target real text. Our findings are valid for both the simplest random texts composed of equally likely characters as well as more elaborate and realistic versions where character probabilities are borrowed from a real text. Conclusions/Significance The good fit of random texts to real Zipf's law-like rank distributions has not yet been established. Therefore, we suggest that Zipf's law might in fact be a fundamental law in natural languages.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Gavaldà, R.
The frequency spectrum of finite samples from the intermittent silence process Journal Article
In: Journal of the American Association for Information Science and Technology, vol. 60, no. 4, pp. 837-843, 2009.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Ferrer2009a,
title = {The frequency spectrum of finite samples from the intermittent silence process},
author = {R. Ferrer-i-Cancho and R. Gavaldà},
doi = {10.1002/asi.21033},
year = {2009},
date = {2009-01-01},
journal = {Journal of the American Association for Information Science and Technology},
volume = {60},
number = {4},
pages = {837-843},
abstract = {It has been argued that the actual distribution of word frequencies could be reproduced or explained by generating a random sequence of letters and spaces according to the so-called intermittent silence process. The same kind of process could reproduce or explain the counts of other kinds of units from a wide range of disciplines. Taking the linguistic metaphor, we focus on the frequency spectrum, i.e., the number of words with a certain frequency, and the vocabulary size, i.e., the number of different words of text generated by an intermittent silence process. We derive and explain how to calculate accurately and efficiently the expected frequency spectrum and the expected vocabulary size as a function of the text size.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2008
Ferrer-i-Cancho, R.; Fernández, A. Hernández
Power laws and the golden number Book Section
In: Kelih, E.; Levickij, V.; Altmann, G. (Ed.): Problems of text analysis, pp. 518-523, Chernivtsi, Books - XXI, 2008.
Abstract | BibTeX | Tags: Zipf's law for word frequencies
@incollection{Ferrer2008d,
title = {Power laws and the golden number},
author = {R. Ferrer-i-Cancho and A. Hernández Fernández},
editor = {E. Kelih and V. Levickij and G. Altmann},
year = {2008},
date = {2008-01-01},
booktitle = {Problems of text analysis},
pages = {518-523},
publisher = {Chernivtsi},
address = {Books - XXI},
abstract = {The distribution of many real discrete random variables (e.g., the frequency of words, the population of cities) can be approximated by a zeta distribution, that is known popularly as Zipf’s law, or power law in physics. Here we revisit the relationship between power law distribution of a magnitude and the corresponding power relationship between the magnitude of a certain element and its rank. We show that the exponents of the two power laws coincide when its value is the famous golden number},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {incollection}
}
2007
Ferrer-i-Cancho, R.
On the universality of Zipf's law for word frequencies Book Section
In: Grzybek, P.; Köhler, R. (Ed.): Exact methods in the study of language and text. To honor Gabriel Altmann, pp. 131-140, Gruyter, Berlin, 2007.
Links | BibTeX | Tags: information theory, Zipf's law for word frequencies
@incollection{Ferrer2006a,
title = {On the universality of Zipf's law for word frequencies},
author = {R. Ferrer-i-Cancho},
editor = {P. Grzybek and R. Köhler},
doi = {10.1515/9783110894219.131},
year = {2007},
date = {2007-01-01},
booktitle = {Exact methods in the study of language and text. To honor Gabriel Altmann},
pages = {131-140},
publisher = {Gruyter},
address = {Berlin},
keywords = {information theory, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {incollection}
}
2005
Ferrer-i-Cancho, R.
Decoding least effort and scaling in signal frequency distributions Journal Article
In: Physica A, vol. 345, pp. 275-284, 2005.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies
@article{Ferrer2003c,
title = {Decoding least effort and scaling in signal frequency distributions},
author = {R. Ferrer-i-Cancho},
doi = {10.1016/j.physa.2004.06.158},
year = {2005},
date = {2005-01-01},
journal = {Physica A},
volume = {345},
pages = {275-284},
abstract = {Here, assuming a general communication model where objects map to signals, a power function for the distribution of signal frequencies is derived. The model relies on the satisfaction of the receiver (hearer) communicative needs when the entropy of the number of objects per signal is maximized. Evidence of power distributions in a linguistic context (some of them with exponents clearly different from the typical $beta approx 2$ of Zipf's law) is reviewed and expanded. We support the view that Zipf's law reflects some sort of optimization but following a novel realistic approach where signals (e.g. words) are used according to the objects (e.g. meanings) they are linked to. Our results strongly suggest that many systems in nature use non-trivial strategies for easing the interpretation of a signal. Interestingly, constraining just the number of interpretations of signals does not lead to scaling.},
keywords = {information theory, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Riordan, O.; Bollobás, B.
The consequences of Zipf's law for syntax and symbolic reference Journal Article
In: Proceedings of the Royal Society of London B, vol. 272, pp. 561-565, 2005.
Abstract | Links | BibTeX | Tags: network science, Zipf's law for word frequencies
@article{Ferrer2004f,
title = {The consequences of Zipf's law for syntax and symbolic reference},
author = {R. Ferrer-i-Cancho and O. Riordan and B. Bollobás},
doi = {10.1098/rspb.2004.2957},
year = {2005},
date = {2005-01-01},
journal = {Proceedings of the Royal Society of London B},
volume = {272},
pages = {561-565},
abstract = {Although many species possess rudimentary communication systems, humans seem to be unique with regard to making use of syntax and symbolic reference. Recent approaches to the evolution of language formalize why syntax is selectively advantageous compared with isolated signal communication systems, but do not explain how signals naturally combine. Even more recent work has shown that if a communication system maximizes communicative efficiency while minimizing the cost of communication, or if a communication system constrains ambiguity in a non-trivial way while a certain entropy is maximized, signal frequencies will be distributed according to Zipf's law. Here we show that such communication principles give rise not only to signals that have many traits in common with the linking words in real human languages, but also to a rudimentary sort of syntax and symbolic reference.},
keywords = {network science, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Servedio, Vito D. P.
Can simple models explain Zipf's law for all exponents? Journal Article
In: Glottometrics, vol. 11, pp. 1-8, 2005.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Ferrer2005c,
title = {Can simple models explain Zipf's law for all exponents?},
author = {R. Ferrer-i-Cancho and Vito D. P. Servedio},
url = {http://hdl.handle.net/2117/176249},
year = {2005},
date = {2005-01-01},
journal = {Glottometrics},
volume = {11},
pages = {1-8},
abstract = {H. Simon proposed a simple stochastic process for explaining Zipf’s law for word frequencies. Here we introduce two similar generalizations of Simon’s model that cover the same range of exponents as the standard Simon model. The mathematical approach followed minimizes the amount of mathematical background needed for deriving the exponent, compared to previous approaches to the standard Simon’s model. Reviewing what is known from other simple explanations of Zipf’s law, we conclude there is no single radically simple explanation covering the whole range of variation of the exponent of Zipf’s law in humans. The meaningfulness of Zipf’s law for word frequencies remains an open question.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2003
Ferrer-i-Cancho, R.; Solé, R. V.
Least effort and the origins of scaling in human language Journal Article
In: Proceedings of the National Academy of Sciences USA, vol. 100, pp. 788-791, 2003.
Abstract | Links | BibTeX | Tags: information theory, Zipf's law for word frequencies
@article{Ferrer2002a,
title = {Least effort and the origins of scaling in human language},
author = {R. Ferrer-i-Cancho and R. V. Solé},
doi = {10.1073/pnas.0335980100},
year = {2003},
date = {2003-01-01},
journal = {Proceedings of the National Academy of Sciences USA},
volume = {100},
pages = {788-791},
abstract = {The emergence of a complex language is one of the fundamental events of human evolution, and several remarkable features suggest the presence of fundamental principles of organization. These principles seem to be common to all languages. The best known is the so-called Zipf's law, which states that the frequency of a word decays as a (universal) power law of its rank. The possible origins of this law have been controversial, and its meaningfulness is still an open question. In this article, the early hypothesis of Zipf of a principle of least effort for explaining the law is shown to be sound. Simultaneous minimization in the effort of both hearer and speaker is formalized with a simple optimization process operating on a binary matrix of signal–object associations. Zipf's law is found in the transition between referentially useless systems and indexical reference systems. Our finding strongly suggests that Zipf's law is a hallmark of symbolic reference and not a meaningless feature. The implications for the evolution of language are discussed. We explain how language evolution can take advantage of a communicative phase transition.},
keywords = {information theory, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2002
Ferrer-i-Cancho, R.; Solé, R. V.
Zipf's law and random texts Journal Article
In: Advances in Complex Systems, vol. 5, pp. 1-6, 2002.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Ferrer2001c,
title = {Zipf's law and random texts},
author = {R. Ferrer-i-Cancho and R. V. Solé},
doi = {10.1142/S0219525902000468},
year = {2002},
date = {2002-01-01},
journal = {Advances in Complex Systems},
volume = {5},
pages = {1-6},
abstract = {Random-text models have been proposed as an explanation for the power law relationship between word frequency and rank, the so-called Zipf's law. They are generally regarded as null hypotheses rather than models in the strict sense. In this context, recent theories of language emergence and evolution assume this law as a priori information with no need of explanation. Here, random texts and real texts are compared through (a) the so-called lexical spectrum and (b) the distribution of words having the same length. It is shown that real texts fill the lexical spectrum much more efficiently and regardless of the word length, suggesting that the meaningfulness of Zipf's law is high.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2001
Ferrer-i-Cancho, R.; Solé, R. V.
Two regimes in the frequency of words and the origin of complex lexicons: Zipf's law revisited Journal Article
In: Journal of Quantitative Linguistics, vol. 8, no. 3, pp. 165-173, 2001.
Abstract | Links | BibTeX | Tags: Zipf's law for word frequencies
@article{Ferrer2000a,
title = {Two regimes in the frequency of words and the origin of complex lexicons: Zipf's law revisited},
author = {R. Ferrer-i-Cancho and R. V. Solé},
doi = {10.1076/jqul.8.3.165.4101},
year = {2001},
date = {2001-01-01},
journal = {Journal of Quantitative Linguistics},
volume = {8},
number = {3},
pages = {165-173},
abstract = {Zipf’s law states that the frequency of a word is a power function of its rank. The exponent of the power is usually accepted to be close to (-)1. Great deviations between the predicted and real number of different words of a text, disagreements between the predicted and real exponent of the probability density function and statistics on a big corpus, make evident that word frequency as a function of the rank follows two different exponents, ~(-)1 for the first regime and ~(-)2 for the second. The implications of the change in exponents for the metrics of texts and for the origins of complex lexicons are analyzed.},
keywords = {Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
In case the fancy publication browser above fails, you can also try.