<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE document PUBLIC "-//CNX//DTD CNXML 0.5 plus MathML//EN" "http://cnx.rice.edu/cnxml/0.5/DTD/cnxml_mathml.dtd">
<document xmlns="http://cnx.rice.edu/cnxml" xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="m11175">
  <name xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Speech Perception</name>
  <metadata xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
  <md:version xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">2.9</md:version>
  <md:created xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">2003/05/21</md:created>
  <md:revised xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">2008/06/18 17:22:52.796 GMT-5</md:revised>
  <md:authorlist xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
      <md:author xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="dmlane">
      <md:firstname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">David</md:firstname>
      
      <md:surname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Lane</md:surname>
      <md:email xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">lane@rice.edu</md:email>
    </md:author>
  </md:authorlist>

  <md:maintainerlist xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
    <md:maintainer xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="dmlane">
      <md:firstname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">David</md:firstname>
      
      <md:surname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Lane</md:surname>
      <md:email xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">lane@rice.edu</md:email>
    </md:maintainer>
    <md:maintainer xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="kclarks">
      <md:firstname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Kyle</md:firstname>
      <md:othername xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Evan</md:othername>
      <md:surname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Clarkson</md:surname>
      <md:email xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">kclarks@gmail.com</md:email>
    </md:maintainer>
    <md:maintainer xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="jsilv">
      <md:firstname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Jeffrey</md:firstname>
      <md:othername xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">M</md:othername>
      <md:surname xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Silverman</md:surname>
      <md:email xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">JSilverman@astro.berkeley.edu</md:email>
    </md:maintainer>
  </md:maintainerlist>
  
  <md:keywordlist xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
    <md:keyword xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">neuropsychology</md:keyword>
    <md:keyword xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">perception</md:keyword>
  </md:keywordlist>

  <md:abstract xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"/>
</metadata>

  <content xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
    <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para1">For most of us, listening to speech is an
      effortless task. Generally speaking, speech perception proceeds
      through a series of stages in which acoustic cues are extracted
      and stored in sensory memory and then mapped onto linguistic
      information. When air from the lungs is pushed into the larynx
      across the vocal cords and into the mouth nose, different types of
      sounds are produced. the different qualities of the sounds are
      represented in formants, which can be pictured on a graph that has
      time on the x-axis and the pressure under which the air is pushed,
      on the y-axis. Perception of the sound will vary as the frequency
      with which the air vibrates across time varies. Because vocal
      tracts vary somewhat between people (just as shoe size or height
      do), one person's vocal cords may be shorter than another's, or the
      roof of someone's mouth may be higher than another's, and the end
      result is that there are individual differences in how various
      sounds are produced. You probably know someone whose voice is
      slightly lower in pitch than yours or higher in pitch. Pitch is the
      psychological correlate of the physical acoustic cue of
      frequency. The more frequently the vibrations of air occur for a
      particular sound, the higher in pitch it will be perceived. Less
      frequent vibrations are perceived as being lower in pitch. When
      language is the sound being processed, the formants are mapped
      onto <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">phonemes</term>, which are the smallest unit of sound in a
      language. For example, in English the phonemes in the word "glad"
      are /g/, /l/, /æ/, and /d/.
    </para>

    <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para2">The nature of speech, however, has provided
      researchers of language with a number of puzzles, some of which
      have been researched for more than forty years.
    </para>
    
    <note xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"> To demonstrate one of these problems, click <link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/Showmethemoney.html">here</link>. The
	waveform you see shows speech as a function of amplitude,
	which is measured in decibels (dB), and frequency of the sound
	waves, measured in hertz (Hz). As the cursor passes over the
	waveform, you may notice various sections that correspond to
	the words and individual sounds you hear; for example, you can
	detect where the word "show" begins and where the word "money"
	ends. After a bit of experimentation, however, you notice that
	it is difficult to pinpoint precisely where one phoneme ends
	and another begins. Try to find the "th" sound in the word
	"the", for example; and where can the "uh" sound in "the" be
	located? Often the acoustic feature of one sound will spread
	themselves across those of another sound, leading to the
	problem of linearity; that is, for each speech sound phoneme,
	if phonemes were produced one at a time, or linearly, there
	should be a single corresponding section in the waveform. As
	"the" shows, however, speech is not linear.  </note>

    <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para4">Another problem that investigators have studied
      is the problem of invariance. <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Invariance</term> refers to
      a particular phoneme having one and only one waveform
      representation; that is, the phoneme /i/ (the "ee" sound in
      "me") should have the identical amplitude and frequency as the
      same phoneme in "money". As you can see again, that is not the
      case; the two differ. The plosives, or stop consonants, /b/,
      /d/, /g/, /k/, provide particular problems for the invariance
      assumption.
    </para>

    <note xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
      To download free sound-processing software to record your own
      sentences now, in order to see the problems of linearity and
      invariance in your own speech, click <link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://www.blackdiamondsound.com/download.htm">here</link>.
    </note>

    <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para6">The problems of linearity and invariance are
      brought about by <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">co-articulation</term>, the influence of
      the <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">articulation</term> (pronunciation) of one phoneme on
      that of another phoneme. Because phonemes cannot always be
      isolated in a spectrogram and can vary from one context to
      another depending on neighboring phonemes, speakers' rate of
      speech, and loudness, perceptually identifying one phoneme among
      a stream of others, the process of segmentation, also seems like
      a daunting task. Theories and models of speech perception have
      to be able to account for how segmentation occurs in order to
      provide an adequate account of speech perception. We will
      discuss some accounts of speech perception below.
    </para>

    <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para7">Some clues as to how identifying phonemes occurs
      arise from investigation into the ability to perceive
      <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">voiced consonants</term>, or consonants in which the vocal
      cords vibrate. To understand the concept of voicing, say the
      phoneme, /p/, followed by the phoneme, /b/, while touching your
      throat. You will feel the vibration of your vocal cords during
      /b/ but not during /p/. Both of these phonemes are
      <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">bilabial</term>; that is, they are produced by pressing
      the lips together, and are released with a puff of air. Since
      the discriminating difference between these two phonemes
      relevant to English is in their voicing, the ability to
      adequately perceive voicing is crucial for an adept listener;
      for example, as the rate of speech increases, listeners are able
      to shift their criterion of what constitutes a voiceless
      phoneme. The criterion shift allows them to accept phonemes that
      are pronounced with shorter VOT durations. Although shifting
      criteria during the perception of phonemes may be one process
      that allows accurate identification of phonemes despite changing
      conditions, what supports the criterion shifts is still a matter
      of investigation. These skills effortlessly become highly
      automatic and are probably acquired and fine-tuned during early
      childhood, a topic we talk about in <cnxn xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" target="infant_list" strength="9">infant speech perception</cnxn>.
      
      <list xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="infant_list">
	<item xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"><link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/infantIntro_h.html">Infant
	language study: Introduction</link></item> 

	<item xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"><link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/infantsucking_h.html">Infant
	language study: High Amplitude Sucking Method</link></item>
	
	<item xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"><link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/infantHeadturn_h.html">Infant
	language study: Head Turn Method</link></item>

	<item xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"><link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/infantlooking_h.html">Infant
	language study: Preferential Looking Method</link></item>

      </list>
	    (Video clips courtesy of the late Peter W. Jusczyk and
	the Johns Hopkins University).
    </para>
   
    <section xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="special">
      <name xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Is speech special?</name> <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para8">In visual
	perception, people discriminate among colors based on the
	frequency of the wave length of light. Low frequencies are
	perceived as red and high frequencies are perceived as violet.
      </para>

      <figure xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="fig1"><media xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" type="image/jpeg" src="spectrum.jpg"/></figure>
      
      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para8b">
	As we move from low to high frequencies, we perceive a
	continuum of colors from red to violet. Notice that as we move
	from red to orange, we pass through a middle ground that we
	call "red orange."  Speech sounds lie on a physical continuum
	as well. For example, an important dimension in speech
	perception is <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">voice onset time</term>. This refers to
	the time between the beginning of the pronunciation of the
	word and the onset of the vibration of the vocal chords. For
	example, when you say "ba" your vocal chords vibrate right
	from the start. When you say "pa" your vocal chords do not
	vibrate until after a short delay. To see this for yourself,
	put one of your fingers on your vocal chords and say "ba" and
	then "pa."</para>
      
      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para9">The only difference between the sound "ba" and
	the sound "pa" is that the voice onset time for "ba" is
	shorter than the voice onset time for "pa". An important
	difference between speech perception and visual perception is
	that we do not hear speech sounds as falling halfway between a
	"ba" and a "pa." We hear a sound one way or the other. This
	means that a range of voice onset times are perceived as "ba"
	and a different range of voice onset times are perceived as
	"pa". This phenomenon is called <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">categorical
	perception</term> and is very helpful for understanding
	speech.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para10">The sounds "ba" and "pa" differ on the
	continuous dimension of voice onset time. The sounds "ga" and
	"da" also differ on a continuous dimension. However, the
	continuous dimension for these stimuli is more complex than the
	dimension of voice onset time (it is called the second formant
	but that is a little beyond the scope of this text). What is
	important here is that there is a continuum of sounds from "da"
	to "ga." The following demonstration uses computer generated
	speech sounds. Ten sounds were generated in equal steps from
	"da" to "ga." The experiment uses sounds numbered 1, 4, 7, and
	10. Sounds 1 and 4 are both heard as "da" whereas sounds 7 and
	10 are heard as "ga." In the task, subjects are presented with
	a randomly-ordered series of sound pairs and asked, for each
	pair, to judge whether the sounds are the same or
	different. Since sounds 1 and 4 are both heard as "da" it should
	be very hard to tell them apart. Therefore, subjects usually
	judge these sounds as identical. By contrast, Sound 4 is heard
	as "da" while Sound 7 is heard as "ga." Since Sound 4 and Sound
	7 are on opposite sides of the "categorical boundary" it is
	easier to hear the difference between these sounds than the
	difference between Sounds 1 and 4. <emphasis xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">This occurs even
	  though the physical difference between Sounds 1 and 4 is the
	  same as the difference between Sounds 4 and 7.</emphasis> By
	similar logic, the difference between Sounds 7 and 10 should be
	hard to hear.
      </para>
      
      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para11">The results from one subject in this
	demonstration experiment are shown below and can be interpreted
	as follows: When the comparison was between Sounds 1 and 4, the
	subject judged them to be different once and the same 4
	times. When the comparison was between Sounds 4 and 7 (which
	cross the border), the subject correctly judged them to be
	different 5/5 times. Finally, in comparing Sounds 7 and 10, the
	subject always judged the sounds to be the same. Thus, the only
	time this subject heard a difference between sounds that were
	three steps apart was for Sounds 4 and 7.
      </para>

      <table xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="discrimination">
	<tgroup xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" cols="3">
	  <thead xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	    <row xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">Sound Pair</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">Judged different</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">Judged same</entry>
	    </row>
	  </thead>
	  <tbody xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	    <row xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">1 vs. 4</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">1</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">4</entry>
	    </row>
	    <row xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">4 vs. 7</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">5</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">0</entry>
	    </row> 
	    <row xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">7 vs. 10</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">0</entry>
	      <entry xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" align="center">5</entry>
	    </row>
	  </tbody>
	</tgroup>
      </table>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para11b">
	Not all results are as clear cut as those shown above. Many
	people need more time to become familiar with the task than is
	possible in this demonstration. In any case, you should get a
	sense of how this kind of experiment works.

	<note xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/"><link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/dagaSound/catDis.html">Try
	this categorical discrimination task yourself.</link>
	</note>

	The hypothesis that speech is perceptually special has arisen
	from this phenomenon of categorical perception. Listeners can
	differentiate between /p/ and /b/; however, performance in
	distinguishing between different types of /p/ sounds is
	difficult and, for some, impossible. This pattern is
	consistent with the pragmatic demands of language; there is a
	meaning distinction between /p/ and /b/, while the distinction
	between two variations of /p/ carries no meaning. (There are
	languages in which two different /p/ sounds are used, and, in
	such cases, perception would be categorical).
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para12">The first experiment to demonstrate
	categorical perception was conducted by <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Liberman1957">Liberman, Harris, Hoffman and Griffith
	(1957)</cite>, and in it they presented consonant-vowel
	syllables along a continuum. The consonants were stop
	consonants, or plosives, /b/, /d/, and /g/, followed by /a/;
	for example, /ba/. When asked to say whether two syllables
	were the same or different, the participants reported various
	forms of /pa/ to be the same, whereas /pa/ and /ba/ were
	easily discriminated.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para13">Another categorical perception task presents
	two syllables followed by a probe syllable, and participants
	have to say which of the first two syllables the probe
	matches. If the first two sounds are from two different
	categories - for example, /da/ and /ga/ - participants
	accurately match the probe syllable. If the first two
	syllables are taken from the same category, however,
	participants cannot differentiate them well enough to do the
	matching task, and their performance is at chance.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para14">Does the categorical perception of speech mean
	that speech is perceived via a specialized speech processor?
	<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Kewley">Kewley-Port and Luce (1984)</cite> did not
	find categorical perception in some non-speech stimuli,
	indicating that there may be something special about speech.
      </para>
      
      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para15">For there to be a specialized speech
	processor, categorical perception should occur during the
	perception of all phonemes. However, <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Fry">Fry,
	Abramson, Eimas, and Liberman (1962)</cite>, failed to find
	categorical perception with a vowel continuum. So, there are
	vowels and consonants that do not behave the same in that
	respect. Additionally, chinchillas have been shown to
	categorically perceive speech, despite their obvious lack of
	speech-processing mechanism (<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Kuhl">Kuhl,
	1987</cite>).
      </para>
    </section>
    
    <section xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="perceived">
      <name xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">How is speech perceived?</name>
      
      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para16">
	One theory of how speech is perceived is the <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">Motor
	Theory of speech perception</term> (<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Liberman1967">Liberman, Cooper, Shankweiler, &amp;
	Studdert-Kennedy, 1967</cite>).  The motor theory postulates
	that speech is perceived by reference to how it is produced;
	that is, when perceiving speech, listeners access their own
	knowledge of how phonemes are articulated. Articulatory
	gestures such as rounding or pressing the lips together are
	units of perception that directly provide the listener with
	phonetic information. The motor theory can account for the
	invariance problem; that is, the ways that phonemes are
	produced and perceived have more in common than the ways they
	are acoustically represented and perceived.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para17">
	What would be the evidence that listeners use articulatory
	features when perceiving speech? Here, an accidental discovery
	made by two film technicians led to one of the most robust and
	widely discussed findings in language processing. A
	researcher, Harry McGurk, was interested in whether auditory
	or visual modalities are differentially dominant during
	infants' perceptual development. To find out, he asked his
	technician to create a film to test which modality captured
	infants' attention. In this film, an actor pronounced the
	syllable "ga" while an auditory "ba" was dubbed over the
	tape. Would babies pay attention to the "ga" or the "ba"? The
	process of making the film, however, led to a surprising
	finding about adults. The technician (and others) did not
	perceive either a "ga" or a "ba". Rather, the technician
	perceived a "da".
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para18">
	In an experiment that formally tested this observation, <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#McGurk">McGurk and McDonald (1976)</cite> showed
	research participants a video of a person saying a syllable
	that began with a consonant formed in the back of the mouth at
	the velum-that is, a velar consonant, "ga"-while playing an
	auditory tape of a consonant which is formed in the front of
	the mouth at the two lips; that is, a bilabial, "ba". When
	viewers were asked what they heard, like the film technician,
	they replied "da". Perceiving a "da" was the result of
	combining articulatory information from both visually and
	auditorily presented stimuli.

	<note xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">You can experience McGurk effect by <link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/McGurk1.html">clicking
	here</link>.</note>

	(To return to the question Harry McGurk originally
	asked about infants, neither modality seems to have dominance;
	infants as young as 5-months old take in the visual and
	auditory information about words in the same way as adults:
	both influence perception).
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para19">
	In addition to being interpreted as evidence that listeners perceive
	phonetic gestures, an account that suggests an explanation based on
	memory has been raised. Because perceivers have ample experience with
	both hearing and seeing people speak, they may have built memories of
	these events that have subsequently become associated with the
	phoneme's mental representation, so that when the phoneme is
	perceived, memories based on the visual information are recalled
	(<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Massaro">Massaro, 1987</cite>).
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para20">
	To test this possibility, <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Fowler">Fowler and Dekle
	(1991)</cite> introduced research participants to one of two
	experimental conditions. In one, the participants were
	presented with either a printed ba or printed ga syllable,
	while listening to a syllable from the auditory /ba/-/ga/
	continuum. In the other, the printed syllables were replaced
	with their haptic presentations; that is, participants were
	able to feel how the syllables were being produced. Since
	there are no previously made associations to how syllables
	feel when a speaker produces them, by the memory account there
	should be no McGurk effect. The experimenters found no effect
	of the printed syllables on the auditory ones, as expected,
	and they found that the feel of how a syllable is produced
	affected the perception of the auditory syllables, indicating
	that articulatory gestures are indeed perceived by listeners.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para21">
	The TRACE model of speech perception, TRACE 1 , developed by
	Jay McClelland and Jeff Elman (<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#McClelland">1986</cite>; <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Elman">Elman &amp;
	McClelland, 1988</cite>), depicts speech as a process in which
	speech units are arranged into levels and interact with each
	other. There are three levels: features, phonemes, and
	words. The levels are comprised of processing units, or nodes;
	for example, within the feature level, there are individual
	nodes that detect voicing.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para22">
	Nodes that are consistent with each other share excitatory
	activation; for example, to perceive a /k/ in "cake", the /k/
	phoneme and corresponding featural units share excitatory
	connections. Nodes that are inconsistent with each other share
	inhibitory links. Such nodes are nodes within a level. In this
	example, /k/ would have an inhibitory connection with the
	vowel sound in "cake", /eI/.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para23">
	To perceive speech, the featural nodes are activated
	initially, followed in time by the phoneme and then word
	nodes. Thus, activation is bottom-up. Activation can also
	spread top-down, however, and TRACE can model top-down effects
	such as the fact that context can influence the perception of
	individual phonemes.
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para24">
	Perception of speech can be influenced by contextual
	information, indicating that perception is not strictly
	bottom-up but can receive feedback from semantic levels of
	knowledge. In 1970, <cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Warren">Warren and
	Warren</cite> took simple sentences, such as "It was found
	that the wheel was on the axle", removed the /w/ sound from
	"wheel", and replaced it with a cough. They found that
	listeners were unable to detect that the phoneme was
	missing. They found the same effect with the following
	sentences as well:

      <quote xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" type="block">
	It was found that the *eel was on the shoe.
      </quote>
      <quote xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" type="block">
	It was found that the *eel was on the orange.
      </quote>
      <quote xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" type="block">
	It was found that the *eel was on the table.
      </quote>
      
	Listeners perceived heel, peel, and meal, respectively. Because
	the perception of the word with the missing phoneme depends on
	the last word of the sentence, their finding indicates that
	perception is highly interactive.  

	<note xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" type="Gating Task">A task developed to show the effect
	  of context on spoken word recognition is Gating (<cite xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="#Grosjean">Grosjean, 1980</cite>). In this task,
	  participants are presented with fragments of a word, of
	  gradually increasing duration (such as 50 msec increments);
	  for example, t - tr - tre - tress - tresp - trespa. Upon
	  hearing each fragment, the participant makes a guess at what
	  the whole word might be. (<link xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" src="http://psych.rice.edu/mmtbn/language/sPerception/gate/index.html">Have
	  a go at this gating task yourself</link>). The point at
	  which the person guesses the whole word is called the
	  <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">isolation point</term>. Gating shows the effect of
	  context on spoken word recognition: there is a time
	  difference between identifying a word in isolation and
	  identifying it in a sentence. The time to identify a word in
	  context is about a fifth of a second, whereas it takes a
	  third of a second in isolation. It is thought that the
	  grammar and meaning of the preceding part of the sentence
	  limit the range of possibilities for the gated word, such
	  that it can be identified sooner in a sentence than on its
	  own. The point at which there is only one possible candidate
	  is called the <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">uniqueness point</term>. The uniqueness
	  point and the isolation point need not correspond: on the
	  one hand, the word may be recognized before there is one
	  remaining candidate, if the context is helpful
	  (<foreign xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">i.e.</foreign>, strongly biasing); on the other
	  hand, there may be a delay in isolating the word. There is a
	  third point, called the <term xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/">recognition point</term>. This
	  is the point at which the person is confident in his/her
	  identification of the gated word.
	</note>
      </para>

      <para xmlns:md="http://cnx.rice.edu/mdml/0.4" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:bib="http://bibtexml.sf.net/" id="para25">The guesses people make on this task indicate
	that the perceptual identity of the word is also important to
	spoken word recognition, even before the context has its
	effect. In other words, people's early guesses resemble the
	perceptual aspects of the word and not the contextually signaled
	candidate.
      </para>

    </section>
  </content>
  <bib:file>
    <bib:entry id="Liberman1957">
      <bib:article>
	<bib:author>Liberman, A. M., Harris, K. S., Hoffman, H. S., &amp;
	Griffith, B. C.</bib:author> 
	<bib:title>The discrimination of speech sounds within and
	across phoneme boundaries</bib:title>
	<bib:journal>Journal of Experimental Psychology</bib:journal>
	<bib:year>1957</bib:year>
	<bib:volume>54</bib:volume>
	<bib:pages>358-368</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Kewley">
      <bib:article>
	<bib:author>Kewley-Port, D., &amp; Luce, P. A.</bib:author>
	<bib:title>Time-varying features of initial stop consonants in
	auditory running spectra: A first report</bib:title>
	<bib:journal>Perception and psychophysics</bib:journal>
	<bib:year>1984</bib:year>
	<bib:volume>35</bib:volume>
	<bib:pages>353-360</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Fry">
      <bib:article>
	<bib:author>Fry, D. B., Abramson, A. S., Eimas, P. D., &amp;
	Liberman, A. M.</bib:author>
	<bib:title>The identification and discrimination of synthetic
	vowels. Language and Speech</bib:title>
	<bib:journal>Language and Speech</bib:journal>
	<bib:year>1962</bib:year>
	<bib:volume>5</bib:volume>
	<bib:pages>171-189</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Kuhl">
      <bib:incollection>
	<bib:author>Kuhl, P.K.</bib:author>
	<bib:title>The special mechanisms debate in speech research:
	Categorization tests on animals and infants</bib:title>
	<bib:booktitle>Categorical perception: The groundwork of
	cognition</bib:booktitle>
	<bib:publisher>Cambridge University Press</bib:publisher>
	<bib:year>1987</bib:year>
	<bib:editor>S. Harnad</bib:editor>
	<bib:pages>355-386</bib:pages>
	<bib:address>Cambridge</bib:address>
      </bib:incollection>
    </bib:entry>
    <bib:entry id="Liberman1967">
      <bib:article>
	<bib:author>Liberman, A. M., Cooper, F. S., Shankweiler,
	D. P., &amp; Studdert-Kennedy, M.</bib:author>
	<bib:title>Perception of the speech code</bib:title>
	<bib:journal>Psychological Review</bib:journal>
	<bib:year>1967</bib:year>
	<bib:volume>74</bib:volume>
	<bib:pages>431-361</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="McGurk">
      <bib:article>
	<bib:author>McGurk, H., &amp; MacDonald, J.</bib:author>
	<bib:title>Hearing lips and seeing voices</bib:title>
	<bib:journal>Nature</bib:journal>
	<bib:year>1976</bib:year>
	<bib:volume>264</bib:volume>
	<bib:pages>746-748</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Fowler">
      <bib:article>
	<bib:author>Fowler, C. A., &amp; Dekle, D. J.</bib:author>
	<bib:title>Listening with eye and hand: Cross-modal
	contributions to speech perception</bib:title>
	<bib:journal>Journal Experimental Psychology: Human Perception
	and Performance</bib:journal>
	<bib:year>1991</bib:year>
	<bib:volume>17</bib:volume>
	<bib:pages>816-828</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="McClelland">
      <bib:article>
	<bib:author>McClelland, J. L., &amp; Elman, J. L.</bib:author>
	<bib:title>The TRACE model of speech perception</bib:title>
	<bib:journal>Cognitive Psychology</bib:journal>
	<bib:year>1986</bib:year>
	<bib:volume>18</bib:volume>
	<bib:pages>1-86</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Elman">
      <bib:article>
	<bib:author>Elman, J. L., &amp; McClelland, J. L.</bib:author>
	<bib:title>Cognitive penetration of the mechanisms of
	perception: Compensation for Co-articulation of lexically
	restored phonemes</bib:title>
	<bib:journal>Journal of Memory and Language</bib:journal>
	<bib:year>1988</bib:year>
	<bib:volume>27</bib:volume>
	<bib:pages>143-165</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Warren">
      <bib:article>
	<bib:author>Warren, R. M., &amp; Warren R. P.</bib:author>
	<bib:title>Auditory illusions and confusions</bib:title>
	<bib:journal>Scientific American</bib:journal>
	<bib:year>1970</bib:year>
	<bib:volume>223</bib:volume>
	<bib:pages>30-36</bib:pages>
      </bib:article>
    </bib:entry>
    <bib:entry id="Grosjean">
      <bib:article>
	<bib:author>Grosjean, F.</bib:author>
	<bib:title>Spoken word recognition processes and the gating
	paradigm</bib:title>
	<bib:journal>Perception and Psychophysics</bib:journal>
	<bib:year>1980</bib:year>
	<bib:volume>28</bib:volume>
	<bib:pages>267-283</bib:pages>
      </bib:article>
    </bib:entry>
  </bib:file>
</document>
