2025
Petrini, Sonia; Ferrer-i-Cancho, Ramon
The distribution of syntactic dependency distances Journal Article
In: Glottometrics, vol. 58, pp. 35-94, 2025.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Petrini2022c,
title = {The distribution of syntactic dependency distances},
author = {Sonia Petrini and Ramon Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2211.14620},
doi = {10.53482/2025_58_424},
year = {2025},
date = {2025-01-01},
journal = {Glottometrics},
volume = {58},
pages = {35-94},
abstract = {The syntactic structure of a sentence can be represented as a graph, where vertices are words and edges indicate syntactic dependencies between them. In this setting, the distance between two linked words is defined as the difference between their positions. Here we wish to contribute to the characterization of the actual distribution of syntactic dependency distances, which has previously been argued to follow a power-law distribution. Here we propose a new model with two exponential regimes in which the probability decay is allowed to change after a break-point. This transition could mirror the transition from the processing of word chunks to higher-level structures. We find that a two-regime model - where the first regime follows either an exponential or a power-law decay - is the most likely one in all 20 languages we considered, independently of sentence length and annotation style. Moreover, the break-point exhibits low variation across languages and averages values of 4-5 words, suggesting that the amount of words that can be simultaneously processed abstracts from the specific language to a high degree. The probability decay slows down after the breakpoint, consistently with a universal chunk-and-pass mechanism. Finally, we give an account of the relation between the best estimated model and the closeness of syntactic dependencies as function of sentence length, according to a recently introduced optimality score.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
2024
Alemany-Puig, L.; Esteban, J. L.; Ferrer-i-Cancho, R.
The Maximum Linear Arrangement Problem for trees under projectivity and planarity Journal Article
In: Information Processing Letters, vol. 183, pp. 106400, 2024.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Alemany2022b,
title = {The Maximum Linear Arrangement Problem for trees under projectivity and planarity},
author = {L. Alemany-Puig and J. L. Esteban and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2206.06924},
doi = {10.1016/j.ipl.2023.106400},
year = {2024},
date = {2024-01-01},
journal = {Information Processing Letters},
volume = {183},
pages = {106400},
abstract = {The Maximum Linear Arrangement problem (MaxLA) consists of finding a mapping π from the n vertices of a graph G to distinct consecutive integers that maximizes the sum of edge lengths. In this setting, vertices are considered to lie on a horizontal line and edges are drawn as semicircles above the line. There exist variants of MaxLA in which the arrangements are constrained. In the planar variant edge crossings are forbidden. In the projective variant for rooted trees arrangements are planar and the root cannot be covered by any edge. Here we present O(n)-time and O(n)-space algorithms that solve Planar and Projective MaxLA for trees. We also prove several properties of maximum projective and planar arrangements.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
Alemany-Puig, L.; Ferrer-i-Cancho, R.
The expected sum of edge lengths in planar linearizations of trees Journal Article
In: Journal of Language Modelling, vol. 12, no. 1, pp. 1-42, 2024.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Alemany2022c,
title = {The expected sum of edge lengths in planar linearizations of trees},
author = {L. Alemany-Puig and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2207.05564},
doi = {10.15398/jlm.v12i1.362},
year = {2024},
date = {2024-01-01},
journal = {Journal of Language Modelling},
volume = {12},
number = {1},
pages = {1-42},
abstract = {Dependency graphs have proven to be a very successful model to represent the syntactic structure of sentences of human languages. In these graphs, widely accepted to be trees, vertices are words and arcs connect syntactically-dependent words. The tendency of these dependencies to be short has been demonstrated using random baselines for the sum of the lengths of the edges or its variants. A ubiquitous baseline is the expected sum in projective orderings (wherein edges do not cross and the root word of the sentence is not covered by any edge). It was shown that said expected value can be computed in O(n) time. In this article we focus on planar orderings (where the root word can be covered) and present two main results. First, we show the relationship between the expected sum in planar arrangements and the expected sum in projective arrangements. Second, we also derive a O(n)-time algorithm to calculate the expected value of the sum of edge lengths. These two results stem from another contribution of the present article, namely a characterization of planarity that, given a sentence, yields either the number of planar permutations or an efficient algorithm to generate uniformly random planar permutations of the words. Our research paves the way for replicating past research on dependency distance minimization using random planar linearizations as random baseline.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
2023
Alemany-Puig, L.; Esteban, J. L.; Ferrer-i-Cancho, R.
On the maximum linear arrangement problem for trees Journal Article
In: 2023.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Alemany2023a,
title = {On the maximum linear arrangement problem for trees},
author = {L. Alemany-Puig and J. L. Esteban and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2312.04487},
year = {2023},
date = {2023-01-01},
abstract = {Linear arrangements of graphs are a well-known type of graph labeling and are found at the heart of many important computational problems, such as the Minimum Linear Arrangement Problem (minLA). A linear arrangement is usually defined as a permutation of the n vertices of a graph. An intuitive geometric setting is that of vertices lying on consecutive integer positions in the real line, starting at 1; edges are typically drawn as semicircles above the real line. In this paper we study the Maximum Linear Arrangement problem (MaxLA), the maximization variant of minLA and a less studied problem than minLA. We a devise new characterization of maximum arrangements of general graphs, and prove that MaxLA can be solved for cycle graphs in constant time, and for k-linear trees (k≤2) in time O(n). We present a simple algorithm that solves a constrained variant of MaxLA, which we call bipartite MaxLA, in time O(n). This algorithm has two promising characteristics. First, it solves MaxLA for most trees consisting of a few tenths of nodes. Second, it produces a high quality approximation to MaxLA for trees where the algorithm fails to solve MaxLA. Furthermore, we conjecture this algorithm solves MaxLA for at least 50% of all free trees.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
2022
Alemany-Puig, Lluís; Esteban, Juan L.; Ferrer-i-Cancho, Ramon
The Linear Arrangement Library. A new tool for research on syntactic dependency structures Proceedings Article
In: Proceedings of the Second Workshop on Quantitative Syntax (Quasy, SyntaxFest 2021), pp. 1-16, Association for Computational Linguistics, Sofia, Bulgaria, 2022.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@inproceedings{Alemany2022a,
title = {The Linear Arrangement Library. A new tool for research on syntactic dependency structures},
author = {Lluís Alemany-Puig and Juan L. Esteban and Ramon Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2112.02512},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the Second Workshop on Quantitative Syntax (Quasy, SyntaxFest 2021)},
pages = {1-16},
publisher = {Association for Computational Linguistics},
address = {Sofia, Bulgaria},
abstract = {The new and growing field of Quantitative Dependency Syntax has emerged at the crossroads between Dependency Syntax and Quantitative Linguistics. One of the main concerns in this field is the statistical patterns of syntactic dependency structures. These structures, grouped in treebanks, are the source for statistical analyses in these and related areas; dozens of scores devised over the years are the tools of a new industry to search for patterns and perform other sorts of analyses. The plethora of such metrics and their increasing complexity require sharing the source code of the programs used to perform such analyses. However, such code is not often shared with the scientific community or is tested following unknown standards. Here we present a new open-source tool, the Linear Arrangement Library (LAL), which caters to the needs of, especially, inexperienced programmers. This tool enables the calculation of these metrics on single syntactic dependency structures, treebanks, and collection of treebanks, grounded on ease of use and yet with great flexibility. LAL has been designed to be efficient, easy to use (while satisfying the needs of all levels of programming expertise), reliable (thanks to thorough testing), and to unite research from different traditions, geographic areas, and research fields.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {inproceedings}
}
Alemany-Puig, L.; Ferrer-i-Cancho, R.
Linear-time calculation of the expected sum of edge lengths in random projective linearizations of trees Journal Article
In: Journal of Computational Linguistics, vol. 48, no. 3, pp. 491–516, 2022.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Alemany2021b,
title = {Linear-time calculation of the expected sum of edge lengths in random projective linearizations of trees},
author = {L. Alemany-Puig and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2107.03277},
doi = {10.1162/coli_a_00442},
year = {2022},
date = {2022-01-01},
journal = {Journal of Computational Linguistics},
volume = {48},
number = {3},
pages = {491–516},
abstract = {The syntactic structure of a sentence is often represented using syntactic dependency trees. The sum of the distances between syntactically related words has been in the limelight for the past decades. Research on dependency distances led to the formulation of the principle of dependency distance minimization whereby words in sentences are ordered so as to minimize that sum. Numerous random baselines have been defined to carry out related quantitative studies on languages. The simplest random baseline is the expected value of the sum in unconstrained random permutations of the words in the sentence, namely when all the shufflings of the words of a sentence are allowed and equally likely. Here we focus on a popular baseline: random projective permutations of the words of the sentence, that is, permutations where the syntactic dependency structure is projective, a formal constraint that sentences satisfy often in languages. Thus far, the expectation of the sum of dependency distances in random projective shufflings of a sentence has been estimated approximately with a Monte Carlo procedure whose cost is of the order of Zn, where n is the number of words of the sentence and Z is the number of samples; the larger Z, the lower the error of the estimation but the larger the time cost. Here we present formulae to compute that expectation without error in time of the order of n. Furthermore, we show that star trees maximize it, and devise a dynamic programming algorithm to retrieve the trees that minimize it.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
Alemany-Puig, L.; Esteban, J. L.; Ferrer-i-Cancho, R.
Minimum projective linearizations of trees in linear time Journal Article
In: Information Processing Letters, vol. 174, pp. 106204, 2022.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Alemany2021a,
title = {Minimum projective linearizations of trees in linear time},
author = {L. Alemany-Puig and J. L. Esteban and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2102.03277},
doi = {10.1016/j.ipl.2021.106204},
year = {2022},
date = {2022-01-01},
journal = {Information Processing Letters},
volume = {174},
pages = {106204},
abstract = {The minimum linear arrangement problem (MLA) consists of finding a mapping π from vertices of a graph to integers that minimizes the sum of dependency distances. For trees, various algorithms are available to solve the problem in polynomial time; the best known runs in subquadratic time in n=|V|. There exist variants of the MLA in which the arrangements are constrained to certain classes of projectivity. Iordanskii, and later Hochberg and Stallmann (HS), put forward O(n)-time algorithms that solve the problem when arrangements are constrained to be planar. We also consider linear arrangements of rooted trees that are constrained to be projective. Gildea and Temperley (GT) sketched an algorithm for the projectivity constraint which, as they claimed, runs in O(n) but did not provide any justification of its cost. In contrast, Park and Levy claimed that GT's algorithm runs in O(n log d_max) where dmax is the maximum degree but did not provide sufficient detail. Here we correct an error in HS's algorithm for the planar case, show its relationship with the projective case, and derive an algorithm for the projective case that runs undoubtlessly in O(n)-time.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Gómez-Rodríguez, C.; Esteban, J. L.; Alemany-Puig, L.
Optimality of syntactic dependency distances Journal Article
In: Physical Review E, vol. 105, pp. 014308, 2022.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2020b,
title = {Optimality of syntactic dependency distances},
author = {R. Ferrer-i-Cancho and C. Gómez-Rodríguez and J. L. Esteban and L. Alemany-Puig},
url = {https://arxiv.org/abs/2007.15342},
doi = {10.1103/PhysRevE.105.014308},
year = {2022},
date = {2022-01-01},
journal = {Physical Review E},
volume = {105},
pages = {014308},
abstract = {It is often stated that human languages, as other biological systems, are shaped by cost-cutting pressures but, to what extent? Attempts to quantify the degree of optimality of languages by means of an optimality score have been scarce and focused mostly on English. Here we recast the problem of the optimality of the word order of a sentence as an optimization problem on a
spatial network where the vertices are words, arcs indicate syntactic dependencies and the space is defined by the linear order of the words in the sentence. We introduce a new score to quantify the cognitive pressure to reduce the distance between linked words in a sentence. The analysis of sentences from 93 languages representing 19 linguistic families reveals that half of languages are optimized to a 70% or more. The score indicates that distances are not significantly reduced in a few languages and confirms two theoretical predictions, i.e. that longer sentences are more optimized and that distances are more likely to be longer than expected by chance in short sentences. We
present a new hierarchical ranking of languages by their degree of optimization. The statistical advantages of the new score call for a reevaluation of the evolution of dependency distance over time in languages as well as the relationship between dependency distance and linguistic competence. Finally, the principles behind the design of the score can be extended to develop more powerful normalizations of topological distances or physical distances in more dimensions.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
spatial network where the vertices are words, arcs indicate syntactic dependencies and the space is defined by the linear order of the words in the sentence. We introduce a new score to quantify the cognitive pressure to reduce the distance between linked words in a sentence. The analysis of sentences from 93 languages representing 19 linguistic families reveals that half of languages are optimized to a 70% or more. The score indicates that distances are not significantly reduced in a few languages and confirms two theoretical predictions, i.e. that longer sentences are more optimized and that distances are more likely to be longer than expected by chance in short sentences. We
present a new hierarchical ranking of languages by their degree of optimization. The statistical advantages of the new score call for a reevaluation of the evolution of dependency distance over time in languages as well as the relationship between dependency distance and linguistic competence. Finally, the principles behind the design of the score can be extended to develop more powerful normalizations of topological distances or physical distances in more dimensions.
Gómez-Rodríguez, C.; Christiansen, M. H.; Ferrer-i-Cancho, R.
Memory limitations are hidden in grammar Journal Article
In: Glottometrics, vol. 52, pp. 39 – 64, 2022.
Abstract | Links | BibTeX | Tags: dependency syntax, network science, word order
@article{Gomez2019a,
title = {Memory limitations are hidden in grammar},
author = {C. Gómez-Rodríguez and M. H. Christiansen and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/1908.06629},
doi = {10.53482/2022_52_397},
year = {2022},
date = {2022-01-01},
journal = {Glottometrics},
volume = {52},
pages = {39 – 64},
abstract = {The ability to produce and understand an unlimited number of different sentences is a hallmark of human language. Linguists have sought to define the essence of this generative capacity using formal grammars that describe the syntactic dependencies between constituents, independent of the computational limitations of the human brain. Here, we evaluate this independence assumption by sampling sentences uniformly from the space of possible syntactic structures. We find that the average dependency distance between syntactically related words, a proxy for memory limitations, is less than expected by chance in a collection of state-of-the-art classes of dependency grammars. Our findings indicate that memory limitations have permeated grammatical descriptions, suggesting that it may be impossible to build a parsimonious theory of human linguistic productivity independent of non-linguistic cognitive constraints.},
keywords = {dependency syntax, network science, word order},
pubstate = {published},
tppubtype = {article}
}
2021
Ferrer-i-Cancho, R.; Gómez-Rodríguez, C.; Esteban, J. L.
Bounds of the variation of the sum of edge lengths in linear arrangements of trees Journal Article
In: Journal of Statistical Mechanics, pp. 023403, 2021.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2020a,
title = {Bounds of the variation of the sum of edge lengths in linear arrangements of trees},
author = {R. Ferrer-i-Cancho and C. Gómez-Rodríguez and J. L. Esteban},
url = {https://arxiv.org/abs/2006.14069},
doi = {10.1088/1742-5468/abd4d7},
year = {2021},
date = {2021-01-01},
journal = {Journal of Statistical Mechanics},
pages = {023403},
abstract = {A fundamental problem in network science is the normalization of the topological or physical distance between vertices, that requires understanding the range of variation of the unnormalized distances. Here we investigate the limits of the variation of the physical distance in linear arrangements of the vertices of trees. In particular, we investigate various problems on the sum of edge lengths in trees of a fixed size: the minimum and the maximum value of the sum for specific trees, the minimum and the maximum in classes of trees (bistar trees and caterpillar trees) and finally the minimum and the maximum for any tree. We establish some foundations for research on optimality scores for spatial networks in one dimension.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2020
Alemany-Puig, L.; Mora, M.; Ferrer-i-Cancho, R.
Reappraising the distribution of the number of edge crossings of graphs on a sphere Journal Article
In: Journal of Statistical Mechanics, pp. 083401, 2020.
Abstract | Links | BibTeX | Tags: network science
@article{Alemany2018b,
title = {Reappraising the distribution of the number of edge crossings of graphs on a sphere},
author = {L. Alemany-Puig and M. Mora and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2003.03353},
doi = {10.1088/1742-5468/aba0ab},
year = {2020},
date = {2020-01-01},
journal = {Journal of Statistical Mechanics},
pages = {083401},
abstract = {Many real transportation and mobility networks have their vertices placed on the surface of the Earth. In such embeddings, the edges laid on that surface may cross. In his pioneering research, Moon analyzed the distribution of the number of crossings on complete graphs and complete bipartite graphs whose vertices are located uniformly at random on the surface of a sphere assuming that vertex placements are independent from each other. Here we revise his derivation of that variance in the light of recent theoretical developments on the variance of crossings and computer simulations. We show that Moon’s formulae are inaccurate in predicting the true variance and provide exact formulae.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Alemany-Puig, L.; Ferrer-i-Cancho, R.
Edge crossings in random linear arrangements Journal Article
In: Journal of Statistical Mechanics, no. 2, pp. 023403, 2020.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Alemany2018a,
title = {Edge crossings in random linear arrangements},
author = {L. Alemany-Puig and R. Ferrer-i-Cancho},
doi = {10.1088/1742-5468/ab6845},
year = {2020},
date = {2020-01-01},
journal = {Journal of Statistical Mechanics},
number = {2},
pages = {023403},
abstract = {In spatial networks vertices are arranged in some space and edges may cross. When arranging vertices in a 1D lattice edges may cross when drawn above the vertex sequence as it happens in linguistic and biological networks. Here we investigate the general problem of the distribution of edge crossings in random arrangements of the vertices. We generalize the existing formula for the expectation of this number in random linear arrangements of trees to any network and derive an expression for the variance of the number of crossings in an arbitrary layout relying on a novel characterization of the algebraic structure of that variance in an arbitrary space. We provide compact formulae for the expectation and the variance in complete graphs, complete bipartite graphs, cycle graphs, one-regular graphs and various kinds of trees (star trees, quasi-star trees and linear trees). In these networks, the scaling of expectation and variance as a function of network size is asymptotically power-law-like in random linear arrangements. Our work paves the way for further research and applications in one-dimension or investigating the distribution of the number of crossings in lattices of higher dimension or other embeddings.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Alemany-Puig, L.; Ferrer-i-Cancho, R.
Fast calculation of the variance of edge crossings in random linear arrangements Journal Article
In: pp. under review, 2020.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Alemany2019b,
title = {Fast calculation of the variance of edge crossings in random linear arrangements},
author = {L. Alemany-Puig and R. Ferrer-i-Cancho},
url = {https://arxiv.org/abs/2003.03258},
year = {2020},
date = {2020-01-01},
pages = {under review},
abstract = {The interest in spatial networks where vertices are embedded in a one-dimensional space is growing. Remarkable examples of these networks are syntactic dependency trees and RNA structures. In this setup, the vertices of the network are arranged linearly and then edges may cross when drawn above the sequence of vertices. Recently, two aspects of the distribution of the number of crossings in uniformly random linear arrangements have been investigated: the expectation and the variance. While the computation of the expectation is straightforward, that of the variance is not. Here we present fast algorithms to calculate that variance in arbitrary graphs and forests. As for the latter, the algorithm calculates variance in linear time with respect to the number of vertices. This paves the way for many applications that rely on an exact but fast calculation of that variance. These algorithms are based on novel arithmetic expressions for the calculation of the variance that we develop from previous theoretical work.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
2019
Ferrer-i-Cancho, R
SyntaxFest 2019 Invited talk - Dependency distance minimization: facts, theory and predictions Proceedings Article
In: Proceedings of the First Workshop on Quantitative Syntax (Quasy, SyntaxFest 2019), pp. 1–1, Association for Computational Linguistics, Paris, France, 2019.
Links | BibTeX | Tags: network science, word order
@inproceedings{Ferrer2019f,
title = {SyntaxFest 2019 Invited talk - Dependency distance minimization: facts, theory and predictions},
author = {R Ferrer-i-Cancho},
url = {https://www.aclweb.org/anthology/W19-7901},
doi = {10.18653/v1/W19-7901},
year = {2019},
date = {2019-08-01},
booktitle = {Proceedings of the First Workshop on Quantitative Syntax (Quasy, SyntaxFest 2019)},
pages = {1–1},
publisher = {Association for Computational Linguistics},
address = {Paris, France},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {inproceedings}
}
Ferrer-i-Cancho, R.
The sum of edge lengths in random linear arrangements Journal Article
In: Journal of Statistical Mechanics, pp. 053401, 2019.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2018a,
title = {The sum of edge lengths in random linear arrangements},
author = {R. Ferrer-i-Cancho},
doi = {10.1088/1742-5468/ab11e2},
year = {2019},
date = {2019-01-01},
journal = {Journal of Statistical Mechanics},
pages = {053401},
abstract = {Spatial networks are networks where nodes are located in a space equipped with a metric. Typically, the space is two-dimensional and until recently and traditionally, the metric that was usually considered was the Euclidean distance. In spatial networks, the cost of a link depends on the edge length, i.e. the distance between the nodes that define the edge. Hypothesizing that there is pressure to reduce the length of the edges of a network requires a null model, e.g. a random layout of the vertices of the network. Here we investigate the properties of the distribution of the sum of edge lengths in random linear arrangement of vertices, that has many applications in different fields. A random linear arrangement consists of an ordering of the elements of the nodes of a network being all possible orderings equally likely. The distance between two vertices is one plus the number of intermediate vertices in the ordering. Compact formulae for the 1st and 2nd moments about zero as well as the variance of the sum of edge lengths are obtained for arbitrary graphs and trees. We also analyze the evolution of that variance in Erdős–Rényi graphs and its scaling in uniformly random trees. Various developments and applications for future research are suggested.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2018
Ferrer-i-Cancho, R.; Gómez-Rodríguez, C.; Esteban, J. L.
Are crossing dependencies really scarce? Journal Article
In: Physica A, vol. 493, pp. 311-329, 2018.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2017a,
title = {Are crossing dependencies really scarce?},
author = {R. Ferrer-i-Cancho and C. Gómez-Rodríguez and J. L. Esteban},
doi = {10.1016/j.physa.2017.10.048},
year = {2018},
date = {2018-01-01},
journal = {Physica A},
volume = {493},
pages = {311-329},
abstract = {The syntactic structure of a sentence can be modelled as a tree, where vertices correspond to words and edges indicate syntactic dependencies. It has been claimed recurrently that the number of edge crossings in real sentences is small. However, a baseline or null hypothesis has been lacking. Here we quantify the amount of crossings of real sentences and compare it to the predictions of a series of baselines. We conclude that crossings are really scarce in real sentences. Their scarcity is unexpected by the hubiness of the trees. Indeed, real sentences are close to linear trees, where the potential number of crossings is maximized.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Vitevitch, M.
The origins of Zipf's meaning-frequency law Journal Article
In: Journal of the American Association for Information Science and Technology, vol. 69, no. 11, pp. 1369–1379, 2018.
Abstract | Links | BibTeX | Tags: information theory, network science, Zipf's meaning-frequency law
@article{Ferrer2017b,
title = {The origins of Zipf's meaning-frequency law},
author = {R. Ferrer-i-Cancho and M. Vitevitch},
doi = {10.1002/jasist.24057},
year = {2018},
date = {2018-01-01},
journal = {Journal of the American Association for Information Science and Technology},
volume = {69},
number = {11},
pages = {1369–1379},
abstract = {In his pioneering research, G.K. Zipf observed that more frequent words tend to have more meanings, and showed that the number of meanings of a word grows as the square root of its frequency. He derived this relationship from two assumptions: that words follow Zipf's law for word frequencies (a power law dependency between frequency and rank) and Zipf's law of meaning distribution (a power law dependency between number of meanings and rank). Here we show that a single assumption on the joint probability of a word and a meaning suffices to infer Zipf's meaning‐frequency law or relaxed versions. Interestingly, this assumption can be justified as the outcome of a biased random walk in the process of mental exploration.},
keywords = {information theory, network science, Zipf's meaning-frequency law},
pubstate = {published},
tppubtype = {article}
}
2017
Ferrer-i-Cancho, R.
A commentary on ``The now-or-never bottleneck: a fundamental constraint on language'', by Christiansen and Chater (2016) Journal Article
In: Glottometrics, vol. 38, pp. 107-111, 2017.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2017d,
title = {A commentary on ``The now-or-never bottleneck: a fundamental constraint on language'', by Christiansen and Chater (2016)},
author = {R. Ferrer-i-Cancho},
url = {http://hdl.handle.net/2117/107857},
year = {2017},
date = {2017-01-01},
journal = {Glottometrics},
volume = {38},
pages = {107-111},
abstract = {In a recent article, Christiansen and Chater (2016) present a fundamental constraint on language, i.e. a now-or-never bottleneck that arises from our fleeting memory, and explore its implications, e.g., chunk-and-pass processing, outlining a framework that promises to unify different areas of research. Here we explore additional support for this constraint and suggest further connections from quantitative linguistics and information theory.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Gómez-Rodríguez, C.; Ferrer-i-Cancho, R.
Scarcity of crossing dependencies: a direct outcome of a specific constraint? Journal Article
In: Physical Review E, vol. 96, pp. 062304, 2017.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Gomez2016a,
title = {Scarcity of crossing dependencies: a direct outcome of a specific constraint?},
author = {C. Gómez-Rodríguez and R. Ferrer-i-Cancho},
doi = {10.1103/PhysRevE.96.062304},
year = {2017},
date = {2017-01-01},
journal = {Physical Review E},
volume = {96},
pages = {062304},
abstract = {The structure of a sentence can be represented as a network where vertices are words and edges indicate syntactic dependencies. Interestingly, crossing syntactic dependencies have been observed to be infrequent in human languages. This leads to the question of whether the scarcity of crossings in languages arises from an independent and specific constraint on crossings. We provide statistical evidence suggesting that this is not the case, as the proportion of dependency crossings of sentences from a wide range of languages can be accurately estimated by a simple predictor based on a null hypothesis on the local probability that two dependencies cross given their lengths. The relative error of this predictor never exceeds 5% on average, whereas the error of a baseline predictor assuming a random ordering of the words of a sentence is at least six times greater. Our results suggest that the low frequency of crossings in natural languages is neither originated by hidden knowledge of language nor by the undesirability of crossings per se, but as a mere side effect of the principle of dependency length minimization.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.
Random crossings in dependency trees Journal Article
In: Glottometrics, vol. 37, pp. 1-12, 2017.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2013d,
title = {Random crossings in dependency trees},
author = {R. Ferrer-i-Cancho},
url = {http://hdl.handle.net/2117/106079},
year = {2017},
date = {2017-01-01},
journal = {Glottometrics},
volume = {37},
pages = {1-12},
abstract = {It has been hypothesized that the rather small number of crossings in real syntactic dependency trees is a side-effect of pressure for dependency length minimization. Here we answer a related important research question: what would be the expected number of crossings if the natural order of a sentence was lost and replaced by a random ordering? We show that this number depends only on the number of vertices of the dependency tree (the sentence length) and the second moment about zero of vertex degrees. The expected number of crossings is minimum for a star tree (crossings are impossible) and maximum for a linear tree (the number of crossings is of the order of the square of the sequence length).},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
2016
Ferrer-i-Cancho, R.
Non-crossing dependencies: least effort, not grammar Book Section
In: Mehler, A.; Lücking, A.; Banisch, S.; Blanchard, P.; Job, B. (Ed.): Towards a theoretical framework for analyzing complex linguistic networks, pp. 203-234, Springer, Berlin, 2016.
Abstract | Links | BibTeX | Tags: network science, word order
@incollection{Ferrer2016d,
title = {Non-crossing dependencies: least effort, not grammar},
author = {R. Ferrer-i-Cancho},
editor = {A. Mehler and A. Lücking and S. Banisch and P. Blanchard and B. Job},
doi = {10.1007/978-3-662-47238-5_10},
year = {2016},
date = {2016-01-01},
booktitle = {Towards a theoretical framework for analyzing complex linguistic networks},
pages = {203-234},
publisher = {Springer},
address = {Berlin},
abstract = {The use of null hypotheses (in a statistical sense) is common in hard sciences but not in theoretical linguistics. Here the null hypothesis that the low frequency of syntactic dependency crossings is expected by an arbitrary ordering of words is rejected. It is shown that this would require star dependency structures, which are both unrealistic and too restrictive. The hypothesis of the limited resources of the human brain is revisited. Stronger null hypotheses taking into account actual dependency lengths for the likelihood of crossings are presented. Those hypotheses suggests that crossings are likely to reduce when dependencies are shortened. A hypothesis based on pressure to reduce dependency lengths is more parsimonious than a principle of minimization of crossings or a grammatical ban that is totally dissociated from the general and non-linguistic principle of economy.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {incollection}
}
Esteban, J. L.; Ferrer-i-Cancho, R.; Gómez-Rodríguez, C.
The scaling of the minimum sum of edge lengths in uniformly random trees Journal Article
In: Journal of Statistical Mechanics, pp. 063401, 2016.
Abstract | Links | BibTeX | Tags: network science
@article{Esteban2016a,
title = {The scaling of the minimum sum of edge lengths in uniformly random trees},
author = {J. L. Esteban and R. Ferrer-i-Cancho and C. Gómez-Rodríguez},
doi = {10.1088/1742-5468/2016/06/063401},
year = {2016},
date = {2016-01-01},
journal = {Journal of Statistical Mechanics},
pages = {063401},
abstract = {The minimum linear arrangement problem on a network consists of finding the minimum sum of edge lengths that can be achieved when the vertices are arranged linearly. Although there are algorithms to solve this problem on trees in polynomial time, they have remained theoretical and have not been implemented in practical contexts to our knowledge. Here we use one of those algorithms to investigate the growth of this sum as a function of the size of the tree in uniformly random trees. We show that this sum is bounded above by its value in a star tree. We also show that the mean edge length grows logarithmically in optimal linear arrangements, in stark contrast to the linear growth that is expected on optimal arrangements of star trees or on random linear arrangements.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.
The meaning-frequency law in Zipfian optimization models of communication Journal Article
In: Glottometrics, vol. 35, pp. 28-37, 2016.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2014d,
title = {The meaning-frequency law in Zipfian optimization models of communication},
author = {R. Ferrer-i-Cancho},
url = {http://hdl.handle.net/2117/95871},
year = {2016},
date = {2016-01-01},
journal = {Glottometrics},
volume = {35},
pages = {28-37},
abstract = {According to Zipf's meaning-frequency law, words that are more frequent tend to have more meanings. Here it is shown that a linear dependency between the frequency of a form and its number of meanings is found in a family of models of Zipf's law for word frequencies. This is evidence for a weak version of the meaning-frequency law. Interestingly, that weak law (a) is not an inevitable of property of the assumptions of the family and (b) is found at least in the narrow regime where those models exhibit Zipf's law for word frequencies.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2015
Ferrer-i-Cancho, R.
The placement of the head that minimizes online memory. A complex systems approach Journal Article
In: Language Dynamics and Change, vol. 5, no. 1, pp. 114-137, 2015.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2013e,
title = {The placement of the head that minimizes online memory. A complex systems approach},
author = {R. Ferrer-i-Cancho},
doi = {10.1163/22105832-00501007},
year = {2015},
date = {2015-01-01},
journal = {Language Dynamics and Change},
volume = {5},
number = {1},
pages = {114-137},
abstract = {It is well known that the length of a syntactic dependency determines its online memory cost. Thus, the problem of the placement of a head and its dependents (complements or modifiers) that minimizes online memory is equivalent to the problem of the minimum linear arrangement of a star tree. However, how that length is translated into cognitive cost is not known. This study shows that the online memory cost is minimized when the head is placed at the center, regardless of the function that transforms length into cost, provided only that this function is strictly monotonically increasing. Online memory defines a quasi-convex adaptive landscape with a single central minimum if the number of elements is odd and two central minima if that number is even. We discuss various aspects of the dynamics of word order of subject (S), verb (V) and object (O) from a complex systems perspective and suggest that word orders tend to evolve by swapping adjacent constituents from an initial or early SOV configuration that is attracted towards a central word order by online memory minimization. We also suggest that the stability of SVO is due to at least two factors, the quasi-convex shape of the adaptive landscape in the online memory dimension and online memory adaptations that avoid regression to SOV. Although OVS is also optimal for placing the verb at the center, its low frequency is explained by its long distance to the seminal SOV in the permutation space.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Gómez-Rodríguez, C.
Crossings as a side effect of dependency lengths Journal Article
In: Complexity, vol. 21, pp. 320-328, 2015.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2015c,
title = {Crossings as a side effect of dependency lengths},
author = {R. Ferrer-i-Cancho and C. Gómez-Rodríguez},
doi = {10.1002/cplx.21810},
year = {2015},
date = {2015-01-01},
journal = {Complexity},
volume = {21},
pages = {320-328},
abstract = {The syntactic structure of sentences exhibits a striking regularity: dependencies tend to not cross when drawn above the sentence. We investigate two competing explanations. The traditional hypothesis is that this trend arises from an independent principle of syntax that reduces crossings practically to zero. An alternative to this view is the hypothesis that crossings are a side effect of dependency lengths, that is, sentences with shorter dependency lengths should tend to have fewer crossings. We are able to reject the traditional view in the majority of languages considered. The alternative hypothesis can lead to a more parsimonious theory of language.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
2014
Ferrer-i-Cancho, R.
A stronger null hypothesis for crossing dependencies Journal Article
In: Europhysics Letters, vol. 108, no. 5, pp. 58003, 2014.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2014c,
title = {A stronger null hypothesis for crossing dependencies},
author = {R. Ferrer-i-Cancho},
doi = {10.1209/0295-5075/108/58003},
year = {2014},
date = {2014-01-01},
journal = {Europhysics Letters},
volume = {108},
number = {5},
pages = {58003},
abstract = {The syntactic structure of a sentence can be modeled as a tree where vertices are words and edges indicate syntactic dependencies between words. It is well known that those edges normally do not cross when drawn over the sentence. Here a new null hypothesis for the number of edge crossings of a sentence is presented. That null hypothesis takes into account the length of the pair of edges that may cross and predicts the relative number of crossings in random trees with a small error, suggesting that a ban of crossings or a principle of minimization of crossings are not needed in general to explain the origins of non-crossing dependencies. Our work paves the way for more powerful null hypotheses to investigate the origins of non-crossing dependencies in Nature.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.
Beyond description. Comment on "Approaching human language with complex networks" by Cong & Liu Journal Article
In: Physics of Life Reviews, vol. 11, no. 4, pp. 621-623, 2014.
Links | BibTeX | Tags: network science
@article{Ferrer2014g,
title = {Beyond description. Comment on "Approaching human language with complex networks" by Cong & Liu},
author = {R. Ferrer-i-Cancho},
doi = {10.1016/j.plrev.2014.07.014},
year = {2014},
date = {2014-01-01},
journal = {Physics of Life Reviews},
volume = {11},
number = {4},
pages = {621-623},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.
In: Physics of Life Reviews, vol. 21, pp. 218-220, 2014.
Links | BibTeX | Tags: network science, word order
@article{Ferrer2017c,
title = {Towards a theory of word order. Comment on "Dependency distance: A new perspective on syntactic patterns in natural language" by Haitao Liu et al.},
author = {R. Ferrer-i-Cancho},
doi = {10.1016/j.plrev.2017.06.019},
year = {2014},
date = {2014-01-01},
journal = {Physics of Life Reviews},
volume = {21},
pages = {218-220},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
2013
Ferrer-i-Cancho, R.
Hubiness, length, crossings and their relationships in dependency trees Journal Article
In: Glottometrics, vol. 25, pp. 1-21, 2013.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2013b,
title = {Hubiness, length, crossings and their relationships in dependency trees},
author = {R. Ferrer-i-Cancho},
url = {http://hdl.handle.net/2117/176972},
year = {2013},
date = {2013-01-01},
journal = {Glottometrics},
volume = {25},
pages = {1-21},
abstract = {Here tree dependency structures are studied from three different perspectives: their degree variance (hubiness), the mean dependency length and the number of dependency crossings. Bounds that reveal pairwise dependencies among these three metrics are derived. Hubiness (the variance of degrees) plays a central role: the mean dependency length is bounded below by hubiness while the number of crossings is bounded above by hubiness. Our findings suggest that the online memory cost of a sentence might be determined not just by the ordering of words but also by the hubiness of the underlying structure. The 2nd moment of degree plays a crucial role that is reminiscent of its role in large complex networks.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Baronchelli, A.; Ferrer-i-Cancho, R.; Pastor-Satorras, R.; Chatter, N.; Christiansen, M. H.
Networks in cognitive science Journal Article
In: Trends in Cognitive Sciences, vol. 17, pp. 348-360, 2013.
Abstract | Links | BibTeX | Tags: network science
@article{Baronchelli2013a,
title = {Networks in cognitive science},
author = {A. Baronchelli and R. Ferrer-i-Cancho and R. Pastor-Satorras and N. Chatter and M. H. Christiansen},
doi = {10.1016/j.tics.2013.04.010},
year = {2013},
date = {2013-01-01},
journal = {Trends in Cognitive Sciences},
volume = {17},
pages = {348-360},
abstract = {Networks of interconnected nodes have long played a key role in Cognitive Science, from artificial neural networks to spreading activation models of semantic memory. Recently, however, a new Network Science has been developed, providing insights into the emergence of global, system-scale properties in contexts as diverse as the Internet, metabolic reactions, and collaborations among scientists. Today, the inclusion of network theory into Cognitive Sciences, and the expansion of complex-systems science, promises to significantly change the way in which the organization and dynamics of cognitive and behavioral processes are understood. In this paper, we review recent contributions of network theory at different levels and domains within the Cognitive Sciences.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2010
Kello, C. T.; Brown, G. D. A.; Ferrer-i-Cancho, R.; Holden, J. G.; Linkenkaer-Hansen, K.; Rhodes, T.; Orden, G. C. Van
Scaling laws in cognitive sciences Journal Article
In: Trends in Cognitive Sciences, vol. 14, no. 5, pp. 223–232, 2010.
Abstract | Links | BibTeX | Tags: network science
@article{Kello2010a,
title = {Scaling laws in cognitive sciences},
author = {C. T. Kello and G. D. A. Brown and R. Ferrer-i-Cancho and J. G. Holden and K. Linkenkaer-Hansen and T. Rhodes and G. C. Van Orden},
doi = {10.1016/j.tics.2010.02.005},
year = {2010},
date = {2010-01-01},
journal = {Trends in Cognitive Sciences},
volume = {14},
number = {5},
pages = {223–232},
abstract = {Scaling laws are ubiquitous in nature, and they pervade neural, behavioral and linguistic activities. A scaling law suggests the existence of processes or patterns that are repeated across scales of analysis. Although the variables that express a scaling law can vary from one type of activity to the next, the recurrence of scaling laws across so many different systems has prompted a search for unifying principles. In biological systems, scaling laws can reflect adaptive processes of various types and are often linked to complex systems poised near critical points. The same is true for perception, memory, language and other cognitive phenomena. Findings of scaling laws in cognitive science are indicative of scaling invariance in cognitive mechanisms and multiplicative interactions among interdependent components of cognition.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2008
Ferrer-i-Cancho, R.
Network theory Book Section
In: Hogan, P. P. Colm (Ed.): The Cambridge encyclopedia of the language sciences, pp. 555-557, Cambridge University Press, 2008.
BibTeX | Tags: network science
@incollection{Ferrer2008c,
title = {Network theory},
author = {R. Ferrer-i-Cancho},
editor = {P. P. Colm Hogan},
year = {2008},
date = {2008-01-01},
booktitle = {The Cambridge encyclopedia of the language sciences},
pages = {555-557},
publisher = {Cambridge University Press},
keywords = {network science},
pubstate = {published},
tppubtype = {incollection}
}
2007
Ferrer-i-Cancho, R.; Mehler, A.; Pustylnikov, O.; Díaz-Guilera, A.
Correlations in the organization of large-scale syntactic dependency networks Proceedings Article
In: Proceedings of the workshop TextGraphs-2: Graph-based Methods for Natural Language Processing at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT 2007), Rochester, New York, pp. 65-72, 2007.
Abstract | Links | BibTeX | Tags: network science
@inproceedings{Ferrer2007b,
title = {Correlations in the organization of large-scale syntactic dependency networks},
author = {R. Ferrer-i-Cancho and A. Mehler and O. Pustylnikov and A. Díaz-Guilera},
url = {https://www.aclweb.org/anthology/W07-0210},
year = {2007},
date = {2007-01-01},
booktitle = {Proceedings of the workshop TextGraphs-2: Graph-based Methods for Natural Language Processing at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT 2007), Rochester, New York},
pages = {65-72},
abstract = {We study the correlations in the connectivity patterns of large scale syntactic dependency networks. These networks are
induced from treebanks: their vertices denote word forms which occur as nuclei of dependency trees.
Their edges connect pairs of vertices if at least two instance nuclei of these vertices are linked in the dependency structure of a sentence.
We examine the syntactic dependency networks of seven languages. In all these cases, we consistently obtain three findings.
Firstly, clustering, i.e., the probability that two vertices which are linked to
a common vertex are linked on their part, is much higher than expected by chance.
Secondly, the mean clustering of vertices decreases with their degree - this finding suggests the presence of a hierarchical
network organization. Thirdly, the mean degree of the nearest neighbors of a vertex x tends to decrease as the degree of
x grows - this finding indicates disassortative mixing in the sense that links tend to connect vertices of dissimilar degrees.
Our results indicate the existence of common patterns in the large scale organization of syntactic dependency networks.},
keywords = {network science},
pubstate = {published},
tppubtype = {inproceedings}
}
induced from treebanks: their vertices denote word forms which occur as nuclei of dependency trees.
Their edges connect pairs of vertices if at least two instance nuclei of these vertices are linked in the dependency structure of a sentence.
We examine the syntactic dependency networks of seven languages. In all these cases, we consistently obtain three findings.
Firstly, clustering, i.e., the probability that two vertices which are linked to
a common vertex are linked on their part, is much higher than expected by chance.
Secondly, the mean clustering of vertices decreases with their degree - this finding suggests the presence of a hierarchical
network organization. Thirdly, the mean degree of the nearest neighbors of a vertex x tends to decrease as the degree of
x grows - this finding indicates disassortative mixing in the sense that links tend to connect vertices of dissimilar degrees.
Our results indicate the existence of common patterns in the large scale organization of syntactic dependency networks.
Ferrer-i-Cancho, R.; Capocci, A.; Caldarelli, G.
Spectral methods cluster words of the same class in a syntactic dependency network Journal Article
In: International Journal of Bifurcation and Chaos, vol. 17, no. 7, pp. 2453-2463, 2007.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2005a,
title = {Spectral methods cluster words of the same class in a syntactic dependency network},
author = {R. Ferrer-i-Cancho and A. Capocci and G. Caldarelli},
doi = {10.1142/S021812740701852X},
year = {2007},
date = {2007-01-01},
journal = {International Journal of Bifurcation and Chaos},
volume = {17},
number = {7},
pages = {2453-2463},
abstract = {We analyze here a particular kind of linguistic network where vertices represent words and edges stand for syntactic relationships between words. The statistical properties of these networks have been recently studied and various features such as the small-world phenomenon and a scale-free distribution of degrees have been found. Our work focuses on four classes of words: verbs, nouns, adverbs and adjectives. Here, we use spectral methods sorting vertices. We show that the ordering clusters words of the same class. For nouns and verbs, the cluster size distribution clearly follows a power-law distribution that cannot be explained by a null hypothesis. Long-range correlations are found between vertices in the ordering provided by the spectral method. The findings support the use of spectral methods for detecting community structure.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2006
Ferrer-i-Cancho, R.
Why do syntactic links not cross? Journal Article
In: Europhysics Letters, vol. 76, no. 6, pp. 1228-1235, 2006.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2006d,
title = {Why do syntactic links not cross?},
author = {R. Ferrer-i-Cancho},
doi = {10.1209/epl/i2006-10406-0},
year = {2006},
date = {2006-01-01},
journal = {Europhysics Letters},
volume = {76},
number = {6},
pages = {1228-1235},
abstract = {Here we study the arrangement of vertices of trees in a 1-dimensional Euclidean space when the Euclidean distance between linked vertices is minimized. We conclude that links are unlikely to cross when drawn over the vertex sequence. This finding suggests that the uncommonness of crossings in the trees specifying the syntactic structure of sentences could be a side-effect of minimizing the Euclidean distance between syntactically related words. As far as we know, nobody has provided a successful explanation of such a surprisingly universal feature of languages that was discovered in the 60s of the past century by Hays and Lecerf. On the one hand, support for the role of distance minimization in avoiding edge crossings comes from statistical studies showing that the Euclidean distance between syntactically linked words of real sentences is minimized or constrained to a small value. On the other hand, that distance is considered a measure of the cost of syntactic relationships in various frameworks. By cost, we mean the amount of computational resources needed by the brain. The absence of crossings in syntactic trees may be universal just because all human brains have limited resources.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.
When language breaks into pieces. A conflict between communication through isolated signals and language Journal Article
In: Biosystems, vol. 84, pp. 242-253, 2006.
Abstract | Links | BibTeX | Tags: information theory, network science
@article{Ferrer2005e,
title = {When language breaks into pieces. A conflict between communication through isolated signals and language},
author = {R. Ferrer-i-Cancho},
doi = {10.1016/j.biosystems.2005.12.001},
year = {2006},
date = {2006-01-01},
journal = {Biosystems},
volume = {84},
pages = {242-253},
abstract = {Here, we study a communication model where signals associate to stimuli. The model assumes that signals follow Zipf’s law and the exponent of the law depends on a balance between maximizing the information transfer and saving the cost of signal use. We study the effect of tuning that balance on the structure of signal–stimulus associations. The model starts from two recent results. First, the exponent grows as the weight of information transfer increases. Second, a rudimentary form of language is obtained when the network of signal–stimulus associations is almost connected. Here, we show the existence of a sudden destruction of language once a critical balance is crossed. The model shows that maximizing the information transfer through isolated signals and language are in conflict. The model proposes a strong reason for not finding large exponents in complex communication systems: language is in danger. Besides, the findings suggest that human words may need to be ambiguous to keep language alive. Interestingly, the model predicts that large exponents should be associated to decreased synaptic density. It is not surprising that the largest exponents correspond to schizophrenic patients since, according to the spirit of Feinberg’s hypothesis, i.e. decreased synaptic density may lead to schizophrenia. Our findings suggest that the exponent of Zipf’s law is intimately related to language and that it could be used to detect anomalous structure and organization of the brain.},
keywords = {information theory, network science},
pubstate = {published},
tppubtype = {article}
}
2005
Ferrer-i-Cancho, R.
The structure of syntactic dependency networks from recent advances in the study of linguistic networks Book Section
In: Levickij, V.; Altmann, G. (Ed.): The problems in quantitative linguistics, pp. 60-75, Ruta, Chernivtsi, 2005.
Abstract | BibTeX | Tags: network science
@incollection{Ferrer2005f,
title = {The structure of syntactic dependency networks from recent advances in the study of linguistic networks},
author = {R. Ferrer-i-Cancho},
editor = {V. Levickij and G. Altmann},
year = {2005},
date = {2005-01-01},
booktitle = {The problems in quantitative linguistics},
pages = {60-75},
publisher = {Ruta},
address = {Chernivtsi},
abstract = {Complex networks have received substantial attention from physics recently. Here we review from a physics perspective the different linguistic networks that have been studied. We focus on syntactic dependency networks and summarize some recent new results that suggest new possible ways of understanding the universal properties of world languages.},
keywords = {network science},
pubstate = {published},
tppubtype = {incollection}
}
Ferrer-i-Cancho, R.; Riordan, O.; Bollobás, B.
The consequences of Zipf's law for syntax and symbolic reference Journal Article
In: Proceedings of the Royal Society of London B, vol. 272, pp. 561-565, 2005.
Abstract | Links | BibTeX | Tags: network science, Zipf's law for word frequencies
@article{Ferrer2004f,
title = {The consequences of Zipf's law for syntax and symbolic reference},
author = {R. Ferrer-i-Cancho and O. Riordan and B. Bollobás},
doi = {10.1098/rspb.2004.2957},
year = {2005},
date = {2005-01-01},
journal = {Proceedings of the Royal Society of London B},
volume = {272},
pages = {561-565},
abstract = {Although many species possess rudimentary communication systems, humans seem to be unique with regard to making use of syntax and symbolic reference. Recent approaches to the evolution of language formalize why syntax is selectively advantageous compared with isolated signal communication systems, but do not explain how signals naturally combine. Even more recent work has shown that if a communication system maximizes communicative efficiency while minimizing the cost of communication, or if a communication system constrains ambiguity in a non-trivial way while a certain entropy is maximized, signal frequencies will be distributed according to Zipf's law. Here we show that such communication principles give rise not only to signals that have many traits in common with the linking words in real human languages, but also to a rudimentary sort of syntax and symbolic reference.},
keywords = {network science, Zipf's law for word frequencies},
pubstate = {published},
tppubtype = {article}
}
2004
Ferrer-i-Cancho, R.
Euclidean distance between syntactically linked words Journal Article
In: Physical Review E, vol. 70, pp. 056135, 2004.
Abstract | Links | BibTeX | Tags: network science, word order
@article{Ferrer2004b,
title = {Euclidean distance between syntactically linked words},
author = {R. Ferrer-i-Cancho},
doi = {10.1103/PhysRevE.70.056135},
year = {2004},
date = {2004-01-01},
journal = {Physical Review E},
volume = {70},
pages = {056135},
abstract = {We study the Euclidean distance between syntactically linked words in sentences. The average distance is significantly small and is a very slowly growing function of sentence length. We consider two nonexcluding hypotheses: (a) the average distance is minimized and (b) the average distance is constrained. Support for (a) comes from the significantly small average distance real sentences achieve. The strength of the minimization hypothesis decreases with the length of the sentence. Support for (b) comes from the very slow growth of the average distance versus sentence length. Furthermore, (b) predicts, under ideal conditions, an exponential distribution of the distance between linked words, a trend that can be identified in real sentences.},
keywords = {network science, word order},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Solé, R. V.; Köhler, R.
Patterns in syntactic dependency networks Journal Article
In: Physical Review E, vol. 69, pp. 051915, 2004.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2003f,
title = {Patterns in syntactic dependency networks},
author = {R. Ferrer-i-Cancho and R. V. Solé and R. Köhler},
doi = {10.1103/PhysRevE.69.051915},
year = {2004},
date = {2004-01-01},
journal = {Physical Review E},
volume = {69},
pages = {051915},
abstract = {Many languages are spoken on Earth. Despite their diversity, many robust language universals are known to exist. All languages share syntax, i.e., the ability of combining words for forming sentences. The origin of such traits is an issue of open debate. By using recent developments from the statistical physics of complex networks, we show that different syntactic dependency networks (from Czech, German, and Romanian) share many nontrivial statistical patterns such as the small world phenomenon, scaling in the distribution of degrees, and disassortative mixing. Such previously unreported features of syntax organization are not a trivial consequence of the structure of sentences, but an emergent trait at the global scale.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2003
Ferrer-i-Cancho, R.
Language: universals, principles and origins PhD Thesis
Universitat Politècnica de Catalunya, 2003.
Abstract | BibTeX | Tags: information theory, network science
@phdthesis{Ferrer2003b,
title = {Language: universals, principles and origins},
author = {R. Ferrer-i-Cancho},
year = {2003},
date = {2003-01-01},
address = {Barcelona},
school = {Universitat Politècnica de Catalunya},
abstract = {Here, old and new linguistic universals, i.e. properties obeyed by all languages on Earth are investigated. Basic principles of language predicting linguistic universals are also investigated. More precisely, two principles of reference, i.e. coding least effort and decoding least effort, a reformulation of G. K. Zipf's speaker and hearer least effort principles. Such referential principles predict Zipf's law, a universal of word frequencies, at the maximum tension between coding and decoding needs. Although trivial processes have been proposed for explaining Zipf's law in non-linguistic contexts, Zipf's law meaningfulness for human language is supported here. Minimizing the Euclidean distance between syntactically related words in sentences is a principle predicting projectivity, a universal stating that arcs between syntactically linked words in sentences generally do not cross. Besides, such a physical distance minimization successfully predicts (a) an exponential distribution for the distribution of the distance between syntactically related words and (b) subject-verb-object (SVO) order superiority in the actual use of world languages. Previously unreported non-trivial features of real syntactic dependency networks are presented here, i.e. scale-free degree distributions, small-world phenomenon, disassortative mixing and hierarchical organization. Instead of a universal grammar, a single universality class is proposed for world languages.
Syntax and symbolic reference are unified under a single topological property, ie. connectedness in the network of signal-object associations of a communication system. Assuming Zipf's law, not only connectedness follows, but the above properties of real syntactic networks. Therefore, (a) referential principles are the principles of syntax and symbolic reference, (b) syntax is a by product of simple communication principles and (c) the above properties of syntactic dependency networks must be universal if Zipf's law is universal, which is the case. The transition to language is shown to be of the kind of a continuous phase transition in physics. Thereafter, the transition to human language could not have been gradual. The reduced network morphospace resulting from a combination of a network distance minimization principle and link density minimization principle is presented as an alternative hypothesis and a promising prospect for linguistic networks subject to fast communication pressures.
The present thesis is unique among theories about the origins of language, in the sense that (a) it explains how words or signals naturally glue in order to form complex messages, (b) it validates its predictions with real data, (c) unifies syntax and symbolic reference and (d) uses ingredients already present in the animal communication systems, in a way no other approximations do. The framework presented is radical shift in the research of linguistic universals and its origins through the physics of critical phenomena. The principles presented here are not principles of human language, but principles of complex communication. Therefore, the such principles suggest new prospects for other information transmission systems in nature.},
keywords = {information theory, network science},
pubstate = {published},
tppubtype = {phdthesis}
}
Syntax and symbolic reference are unified under a single topological property, ie. connectedness in the network of signal-object associations of a communication system. Assuming Zipf's law, not only connectedness follows, but the above properties of real syntactic networks. Therefore, (a) referential principles are the principles of syntax and symbolic reference, (b) syntax is a by product of simple communication principles and (c) the above properties of syntactic dependency networks must be universal if Zipf's law is universal, which is the case. The transition to language is shown to be of the kind of a continuous phase transition in physics. Thereafter, the transition to human language could not have been gradual. The reduced network morphospace resulting from a combination of a network distance minimization principle and link density minimization principle is presented as an alternative hypothesis and a promising prospect for linguistic networks subject to fast communication pressures.
The present thesis is unique among theories about the origins of language, in the sense that (a) it explains how words or signals naturally glue in order to form complex messages, (b) it validates its predictions with real data, (c) unifies syntax and symbolic reference and (d) uses ingredients already present in the animal communication systems, in a way no other approximations do. The framework presented is radical shift in the research of linguistic universals and its origins through the physics of critical phenomena. The principles presented here are not principles of human language, but principles of complex communication. Therefore, the such principles suggest new prospects for other information transmission systems in nature.
Ferrer-i-Cancho, R.; Solé, R. V.
Optimization in complex networks Book Section
In: Pastor-Satorras, R.; Rubí, J. M.; Díaz-Guilera, A. (Ed.): Statistical Mechanics of complex networks, vol. 625, pp. 114-125, Springer, Berlin, 2003.
Abstract | Links | BibTeX | Tags: network science
@incollection{Ferrer2003a,
title = {Optimization in complex networks},
author = {R. Ferrer-i-Cancho and R. V. Solé},
editor = {R. Pastor-Satorras and J. M. Rubí and A. Díaz-Guilera},
doi = {10.1007/b12331},
year = {2003},
date = {2003-01-01},
booktitle = {Statistical Mechanics of complex networks},
volume = {625},
pages = {114-125},
publisher = {Springer},
address = {Berlin},
series = {Lecture Notes in Physics},
abstract = {Many complex systems can be described in terms of networks of interacting units. Recent studies have shown that a wide class of both natural and artificial nets display a surprisingly widespread feature: the presence of highly heterogeneous distributions of links, providing an extraordinary source of robustness against perturbations. Although most theories concerning the origin of these topologies use growing graphs, here we show that a simple optimization process can also account for the observed regularities displayed by most complex nets. Using an evolutionary algorithm involving minimization of link density and average distance, four major types of networks are encountered: (a) sparse exponential-like networks, (b) sparse scale-free networks, (c) star networks and (d) highly dense networks, apparently defining three major phases. These constraints provide a new explanation for scaling of exponent about -3. The evolutionary consequences of these results are outlined.},
keywords = {network science},
pubstate = {published},
tppubtype = {incollection}
}
2002
Solé, R. V.; Ferrer-i-Cancho, R.; Montoya, J. M.; Valverde, S.
Selection, tinkering and emergence in complex networks Journal Article
In: Complexity, vol. 8, pp. 20-33, 2002.
Links | BibTeX | Tags: network science
@article{Sole2000a,
title = {Selection, tinkering and emergence in complex networks},
author = {R. V. Solé and R. Ferrer-i-Cancho and J. M. Montoya and S. Valverde},
doi = {10.1006/jtbi.1999.0901},
year = {2002},
date = {2002-01-01},
journal = {Complexity},
volume = {8},
pages = {20-33},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Valverde, S.; Ferrer-i-Cancho, R.; Solé, R. V.
Scale free networks from optimal design Journal Article
In: Europhysics Letters, vol. 60, no. 4, pp. 512-517, 2002.
Abstract | Links | BibTeX | Tags: network science
@article{Valverde2002,
title = {Scale free networks from optimal design},
author = {S. Valverde and R. Ferrer-i-Cancho and R. V. Solé},
doi = {10.1209%2Fepl%2Fi2002-00248-2},
year = {2002},
date = {2002-01-01},
journal = {Europhysics Letters},
volume = {60},
number = {4},
pages = {512-517},
abstract = {A large number of complex networks, both natural and artificial, share the presence of highly heterogeneous, scale-free degree distributions. A few mechanisms for the emergence of such patterns have been suggested, optimization not being one of them. In this letter we present the first evidence for the emergence of scaling (and the presence of small-world behavior) in software architecture graphs from a well-defined local optimization process. Although the rules that define the strategies involved in software engineering should lead to a tree-like structure, the final net is scale-free, perhaps reflecting the presence of conflicting constraints unavoidable in a multidimensional optimization process. The consequences for other complex networks are outlined.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
2001
Ferrer-i-Cancho, R.; Janssen, C.; Solé, R. V.
Topology of technology graphs: small world patterns in electronic circuits Journal Article
In: Physical Review E, vol. 64, pp. 046119, 2001.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2001e,
title = {Topology of technology graphs: small world patterns in electronic circuits},
author = {R. Ferrer-i-Cancho and C. Janssen and R. V. Solé},
doi = {10.1103/PhysRevE.64.046119},
year = {2001},
date = {2001-01-01},
journal = {Physical Review E},
volume = {64},
pages = {046119},
abstract = {Recent theoretical studies and extensive data analyses have revealed a common feature displayed by biological, social, and technological networks: the presence of small world patterns. Here we analyze this problem by using several graphs obtained from one of the most common technological systems: electronic circuits. It is shown that both analogic and digital circuits exhibit small world behavior. We conjecture that the small world pattern arises from the compact design in which many elements share a small, close physical neighborhood plus the fact that the system must define a single connected component (which requires shortcuts connecting different integrated clusters). The degree distributions displayed are consistent with a conjecture concerning the sharp cutoffs associated to the presence of costly connections [Amaral et al., Proc. Natl. Acad. Sci. USA 97, 11 149 (2000)], thus providing a limit case for the classes of universality of small world patterns from real, artificial networks. The consequences for circuit design are outlined.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
Ferrer-i-Cancho, R.; Solé, R. V.
The small-world of human Language Journal Article
In: Proceedings of the Royal Society of London B, vol. 268, pp. 2261-2266, 2001.
Abstract | Links | BibTeX | Tags: network science
@article{Ferrer2001a,
title = {The small-world of human Language},
author = {R. Ferrer-i-Cancho and R. V. Solé},
doi = {10.1098/rspb.2001.1800},
year = {2001},
date = {2001-01-01},
journal = {Proceedings of the Royal Society of London B},
volume = {268},
pages = {2261-2266},
abstract = {Words in human language interact in sentences in non–random ways, and allow humans to construct an astronomic variety of sentences from a limited number of discrete units. This construction process is extremely fast and robust. The co–occurrence of words in sentences reflects language organization in a subtle manner that can be described in terms of a graph of word interactions. Here, we show that such graphs display two important features recently found in a disparate number of complex systems. (i) The so called small–world effect. In particular, the average distance between two words, d (i.e. the average minimum number of links to be crossed from an arbitrary word to another), is shown to be d≈ 2–3, even though the human brain can store many thousands. (ii) A scale–free distribution of degrees. The known pronounced effects of disconnecting the most connected vertices in such networks can be identified in some language disorders. These observations indicate some unexpected features of language organization that might reflect the evolutionary and social history of lexicons and the origins of their flexibility and combinatorial nature.},
keywords = {network science},
pubstate = {published},
tppubtype = {article}
}
In case the fancy publication browser above fails, you can also try.