%PDF-1.4
%
1 0 obj
<<
/Type /Page
/Parent 11 0 R
/Resources << /ColorSpace << /CS0 41 0 R >> /ExtGState << /GS0 10 0 R /GS1 40 0 R >>
/Font << /TT0 5 0 R /TT1 6 0 R /TT2 7 0 R /TT3 8 0 R /TT4 9 0 R /T1_0 4 0 R
/T1_1 42 0 R >>
/ProcSet [ /PDF /Text ] >>
/Contents 44 0 R
>>
endobj
4 0 obj
<<
/Type /Font
/Subtype /Type1
/FirstChar 1
/LastChar 1
/Widths [ 1028 ]
/Encoding 27 0 R
/BaseFont /KPNAGO+CMSY9
/FontDescriptor 19 0 R
>>
endobj
5 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 45
/LastChar 121
/Widths [ 333 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 722 0 722 0 0 611 0 0 389
0 0 667 0 722 778 611 0 722 556 667 0 0 1000 0 0 0 0 0 0 0 0 0 500
556 444 556 444 333 500 556 278 0 0 278 833 556 500 556 0 444 389
333 556 500 722 500 500 ]
/Encoding /WinAnsiEncoding
/BaseFont /Times-Bold
/FontDescriptor 21 0 R
>>
endobj
6 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 38
/LastChar 150
/Widths [ 778 0 333 333 0 564 250 333 250 0 500 500 500 500 500 500 500 500
500 500 278 278 0 0 0 0 0 722 667 667 722 611 556 722 722 333 389
722 611 889 722 722 556 722 667 556 611 722 0 944 722 0 0 0 0 0
0 0 0 444 500 444 500 444 333 500 500 278 278 500 278 778 500 500
500 500 333 389 278 500 500 722 500 500 444 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 333 444 444 0 500 ]
/Encoding /WinAnsiEncoding
/BaseFont /Times-Roman
/FontDescriptor 22 0 R
>>
endobj
7 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 46
/LastChar 152
/Widths [ 600 600 0 0 0 0 0 0 0 0 0 0 600 0 0 0 0 0 600 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 600 0 600 600 600 0
0 600 600 0 0 0 600 600 600 600 0 0 600 600 600 0 600 600 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 600 ]
/Encoding /WinAnsiEncoding
/BaseFont /Courier
/FontDescriptor 23 0 R
>>
endobj
8 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 44
/LastChar 122
/Widths [ 250 333 0 0 500 500 500 0 0 0 0 0 0 0 333 0 0 0 0 0 0 611 0 667 722
611 611 722 0 333 0 0 556 833 667 722 611 0 611 500 556 0 0 0 0
0 0 0 0 0 0 0 0 500 500 444 500 444 278 500 500 278 0 444 278 722
500 500 500 0 389 389 278 500 444 667 444 444 389 ]
/Encoding /WinAnsiEncoding
/BaseFont /Times-Italic
/FontDescriptor 24 0 R
>>
endobj
9 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 222
/LastChar 222
/Widths [ 556 ]
/Encoding /MacRomanEncoding
/BaseFont /Times-Roman
/FontDescriptor 25 0 R
>>
endobj
10 0 obj
<<
/Type /ExtGState
/SA false
/SM 0.02
/OP false
/op false
/OPM 1
/BG2 /Default
/UCR2 /Default
/HT /Default
/TR2 /Default
>>
endobj
11 0 obj
<<
/Type /Pages
/Kids [ 1 0 R 12 0 R ]
/Count 2
/MediaBox [ 0 0 612 792 ]
>>
endobj
12 0 obj
<<
/Type /Page
/Parent 11 0 R
/Resources << /ColorSpace << /CS0 41 0 R >> /ExtGState << /GS0 10 0 R /GS1 40 0 R >>
/Font << /TT0 6 0 R /TT1 8 0 R /TT2 9 0 R /TT3 5 0 R /T1_0 15 0 R /TT4 16 0 R
/T1_1 42 0 R >>
/ProcSet [ /PDF /Text ] >>
/Contents 46 0 R
>>
endobj
15 0 obj
<<
/Type /Font
/Subtype /Type1
/FirstChar 1
/LastChar 1
/Widths [ 406 ]
/Encoding 28 0 R
/BaseFont /KPNCAF+CMMI10
/FontDescriptor 17 0 R
>>
endobj
16 0 obj
<<
/Type /Font
/Subtype /TrueType
/FirstChar 222
/LastChar 222
/Widths [ 500 ]
/Encoding /MacRomanEncoding
/BaseFont /Times-Italic
/FontDescriptor 26 0 R
>>
endobj
17 0 obj
<<
/Type /FontDescriptor
/Ascent 0
/CapHeight 0
/Descent 0
/Flags 68
/FontBBox [ -32 -250 1048 750 ]
/FontName /KPNCAF+CMMI10
/ItalicAngle -14.03999
/StemV 72
/StemH 31
/CharSet (/epsilon1)
/FontFile3 18 0 R
>>
endobj
18 0 obj
<< /Length 298 /Subtype /Type1C >>
stream
KPNCAF+CMMI10 $Jk.1 NTepsilon1Copyright (C) 1997 American Mathematical Society. All Rights ReservedCMMI10 gMUV怡k#vz6~zh'-<̳wlfzfQY㮓u?aCy
endstream
endobj
19 0 obj
<<
/Type /FontDescriptor
/Ascent 0
/CapHeight 0
/Descent 0
/Flags 68
/FontBBox [ -30 -958 1146 777 ]
/FontName /KPNAGO+CMSY9
/ItalicAngle -14.035
/StemV 87
/StemH 43
/CharSet (/circlecopyrt)
/FontFile3 20 0 R
>>
endobj
20 0 obj
<< /Length 290 /Subtype /Type1C >>
stream
KPNAGO+CMSY9 %J_mRz14
RWcirclecopyrtCopyright (C) 1997 American Mathematical Society. All Rights ReservedCMSY9 a lĶ϶^ccaeaedc\;xNR}PPuxNR}PPuu?Cy
endstream
endobj
21 0 obj
<<
/Type /FontDescriptor
/Ascent 750
/CapHeight 676
/Descent -250
/Flags 262178
/FontBBox [ -168 -218 1000 935 ]
/FontName /Times-Bold
/ItalicAngle 0
/StemV 133
/XHeight 461
/StemH 139
>>
endobj
22 0 obj
<<
/Type /FontDescriptor
/Ascent 750
/CapHeight 662
/Descent -250
/Flags 34
/FontBBox [ -168 -218 1000 898 ]
/FontName /Times-Roman
/ItalicAngle 0
/StemV 84
/XHeight 450
/StemH 84
>>
endobj
23 0 obj
<<
/Type /FontDescriptor
/Ascent 753
/CapHeight 562
/Descent -246
/Flags 35
/FontBBox [ -28 -250 628 805 ]
/FontName /Courier
/ItalicAngle 0
/StemV 51
/XHeight 426
/StemH 51
>>
endobj
24 0 obj
<<
/Type /FontDescriptor
/Ascent 750
/CapHeight 653
/Descent -250
/Flags 98
/FontBBox [ -169 -217 1010 883 ]
/FontName /Times-Italic
/ItalicAngle -15
/StemV 76
/XHeight 441
/StemH 76
>>
endobj
25 0 obj
<<
/Type /FontDescriptor
/Ascent 750
/CapHeight 662
/Descent -250
/Flags 34
/FontBBox [ -168 -218 1000 898 ]
/FontName /Times-Roman
/ItalicAngle 0
/StemV 84
/XHeight 450
/StemH 84
>>
endobj
26 0 obj
<<
/Type /FontDescriptor
/Ascent 750
/CapHeight 653
/Descent -250
/Flags 98
/FontBBox [ -169 -217 1010 883 ]
/FontName /Times-Italic
/ItalicAngle -15
/StemV 76
/XHeight 441
/StemH 76
>>
endobj
27 0 obj
<<
/Type /Encoding
/Differences [ 1 /circlecopyrt ]
>>
endobj
28 0 obj
<<
/Type /Encoding
/Differences [ 1 /epsilon1 ]
>>
endobj
29 0 obj
<<
/S /D
>>
endobj
30 0 obj
<<
/Nums [ 0 29 0 R ]
>>
endobj
32 0 obj
<<
/FICL:Enfocus 33 0 R
/Metadata 48 0 R
/Pages 11 0 R
/Type /Catalog
/PageLabels 30 0 R
>>
endobj
33 0 obj
<<
/PitStop 34 0 R
>>
endobj
34 0 obj
<<
/CC 35 0 R
>>
endobj
35 0 obj
<<
>>
endobj
38 0 obj
<<
/CreationDate (D:20050329174016-06'00')
/Subject (Doctoral Consortium)
/Author (Shimon Whiteson)
/Keywords (reinforcement learning; evolutionary computation)
/Producer (PSNormalizer.framework)
/ModDate (D:20050524002155-07'00')
/Title (Improving Reinforcement Learning Function Approximators via Neuroevoluti\
on)
>>
endobj
40 0 obj
<<
/Type /ExtGState
/SA false
/OP false
/op false
/OPM 0
/BG2 /Default
/UCR2 /Default
/TR2 /Default
/HT /Default
/CA 1
/ca 1
/SMask /None
/AIS false
/BM /Normal
/TK true
>>
endobj
41 0 obj
/DeviceGray
endobj
42 0 obj
<<
/Subtype /Type1
/Type /Font
/BaseFont /Times-Roman
/Encoding 43 0 R
>>
endobj
43 0 obj
<<
/Differences [ 1 /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek
/ring 11 /breve /minus 14 /Zcaron /zcaron /caron /dotlessi 30 /grave
/quotesingle 130 /quotesinglbase /florin /quotedblbase /ellipsis
/dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft
/OE 147 /quotedblleft /quotedblright /bullet /endash /emdash /tilde
/trademark /scaron /guilsinglright /oe 159 /Ydieresis 164 /currency
166 /brokenbar 168 /dieresis /copyright /ordfeminine 172 /logicalnot
/hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior
/acute /mu 183 /periodcentered /cedilla /onesuperior /ordmasculine
188 /onequarter /onehalf /threequarters 192 /Agrave /Aacute /Acircumflex
/Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex
/Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde
/Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash
/Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls
/agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla
/egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex
/idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis
/divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute
/thorn /ydieresis ]
/Type /Encoding
>>
endobj
44 0 obj
<< /Length 45 0 R >>
stream
0 g
1 i
/RelativeColorimetric ri
/GS0 gs
BT
/TT0 1 Tf
14.3462 0 0 14.3462 58.977 689.6371 Tm
[(Impr)18(o)10(ving)-250(Reinf)25(or)18(cement)-250(Lear)15(ning)-250(Function)-250(A)25(ppr)18(oximators)-250(via)-250(Neur)18(oe)15(v)10(olution)]TJ
11.9551 0 0 11.9551 260.658 668.3521 Tm
[(Shimon)-249(Whiteson)]TJ
/TT1 1 Tf
9.9626 0 0 9.9626 237.24 657.399 Tm
[(Department)-252(of)-250(Computer)-251(Sciences)]TJ
1.0479 -1.1003 Td
[(Uni)25(v)15(ersity)-251(of)-250(T)70(e)15(xas)-251(at)-250(Austin)]TJ
1.1039 -1.1003 Td
[(Austin,)-250(TX)-250(78712)-252(USA)]TJ
/TT2 1 Tf
-1.2503 -1.0994 Td
(shimon@cs.utexas.edu)Tj
-3.5999 -1.1003 Td
(http://www.cs.utexas.edu/\230shimon)Tj
/TT0 1 Tf
-5.5847 -2.9694 Td
(Abstract)Tj
/TT1 1 Tf
8.9664 0 0 8.9664 63.963 566.2471 Tm
[(Reinforcement)-213(learning)-212(problems)-212(are)-213(commonly)-213(tackled)-213(with)]TJ
/TT3 1 Tf
0 -1.1112 TD
[(tempor)15(al)-380(dif)18(fer)37(ence)-381(methods)]TJ
/TT1 1 Tf
11.6335 0 Td
[(,)-412(which)-381(use)-379(dynamic)-381(program-)]TJ
-11.6335 -1.1112 Td
[(ming)-252(and)-253(statistical)-251(sampling)-252(to)-252(estimate)-253(the)-252(long-term)-252(v)25(alue)]TJ
T*
[(of)-258(taking)-258(each)-259(action)-259(in)-257(each)-259(state.)-333(In)-258(most)-257(problems)-258(of)-258(real-)]TJ
T*
[(w)10(orld)-364(interest,)-393(learning)-365(this)-364(v)25(alue)-365(function)-364(requires)-364(a)]TJ
/TT3 1 Tf
22.322 0 Td
(func-)Tj
-22.322 -1.1111 Td
[(tion)-255(appr)45(oximator)]TJ
/TT1 1 Tf
7.2654 0 Td
[(,)-257(which)-256(represents)-255(the)-255(mapping)-256(from)-255(state-)]TJ
-7.2654 -1.1112 Td
[(action)-344(pairs)-342(to)-344(v)25(alues)-343(via)-343(a)-344(concise,)-367(parameterized)-345(function)]TJ
T*
[(and)-350(uses)-349(supervised)-349(learning)-350(methods)-350(to)-349(set)-350(its)-348(parameters.)]TJ
T*
[(Function)-244(approximators)-243(mak)10(e)-244(it)-243(possible)-243(to)-243(use)-244(temporal)-243(dif-)]TJ
T*
[(ference)-316(methods)-315(on)-315(lar)18(ge)-315(problems)-315(b)20(ut,)-331(in)-315(practice,)-332(the)-315(fea-)]TJ
T*
[(sibility)-317(of)-316(doing)-318(so)-317(depends)-317(on)-318(the)-317(ability)-318(of)-317(the)-317(human)-318(de-)]TJ
T*
[(signer)-398(to)-398(select)-398(an)-398(appropriate)-399(representation)-398(for)-398(the)-398(v)25(alue)]TJ
T*
[(function.)-889(My)-443(thesis)-443(presents)-443(a)-443(ne)25(w)-445(approach)-444(to)-444(function)]TJ
T*
[(approximation)-321(that)-319(automates)-321(some)-319(of)-319(these)-320(dif)]TJ
/TT4 1 Tf
19.3902 0 Td
(\336)Tj
/TT1 1 Tf
0.5562 0 Td
[(cult)-320(design)]TJ
-19.9464 -1.1101 Td
[(choices)-360(by)-360(coupling)-360(temporal)-360(dif)25(ference)-360(methods)-359(with)-360(pol-)]TJ
T*
[(ic)15(y)-252(search)-253(methods)-252(such)-252(as)-252(e)25(v)20(olutionary)-253(computation.)-317(It)-252(also)]TJ
T*
[(presents)-256(a)-257(particular)-257(implementation)-258(which)-257(combines)-257(NEA)111(T)74(,)]TJ
T*
[(a)-269(neuroe)25(v)20(olutionary)-270(polic)15(y)-269(search)-268(method,)-274(and)-269(Q-learning,)-274(a)]TJ
T*
[(popular)-326(temporal)-327(dif)25(ference)-326(method,)-346(to)-325(yield)-327(a)-326(ne)25(w)-326(method)]TJ
T*
[(called)-278(NEA)111(T+Q)-278(that)-278(automatically)-278(learns)-278(ef)25(fecti)25(v)15(e)-278(represen-)]TJ
T*
[(tations)-343(for)-343(neural)-344(netw)10(ork)-344(function)-343(approximators.)-591(Empiri-)]TJ
T*
[(cal)-371(results)-370(in)-371(a)-371(serv)15(er)-371(job)-371(scheduling)-371(task)-371(demonstrate)-372(that)]TJ
T*
[(NEA)111(T+Q)-395(can)-395(outperform)-394(both)-394(NEA)111(T)-395(and)-395(Q-learning)-395(with)]TJ
T*
[(manually)-251(designed)-251(neural)-251(netw)10(orks.)]TJ
/TT0 1 Tf
11.9551 0 0 11.9551 131.031 308.172 Tm
[(Thesis)-249(Ov)10(er)10(view)]TJ
/TT1 1 Tf
9.9626 0 0 9.9626 54 293.574 Tm
[(In)-305(man)15(y)-308(machine)-307(learning)-307(problems,)-321(an)-306(agent)-307(must)-305(learn)-307(a)]TJ
/TT3 1 Tf
0 -1.1003 TD
(policy)Tj
/TT1 1 Tf
2.6629 0 Td
[(for)-218(selecting)-219(actions)-219(based)-219(on)-219(its)]TJ
/TT3 1 Tf
13.0861 0 Td
(state)Tj
/TT1 1 Tf
1.8887 0 Td
[(,)-224(which)-219(consists)]TJ
-17.6377 -1.0994 Td
[(of)-399(its)-398(current)-400(kno)25(wledge)-403(about)-401(the)-400(w)10(orld.)]TJ
/TT3 1 Tf
18.1482 0 Td
[(Reinfor)37(cement)]TJ
-18.1482 -1.1003 Td
(learning)Tj
/TT1 1 Tf
3.6064 0 Td
[(problems)-218(are)-217(the)-218(subset)-217(of)-217(these)-217(tasks)-217(for)-216(which)-218(the)]TJ
-3.6064 -1.1003 Td
[(agent)-336(must)-335(learn)-335(a)-336(polic)15(y)-336(without)-336(e)25(v)15(er)-336(seeing)-336(e)15(xamples)-337(of)]TJ
0 -1.0994 TD
[(correct)-289(beha)20(vior)55(.)-425(Instead,)-297(it)-288(recei)25(v)15(es)-289(only)-289(positi)25(v)15(e)-289(and)-290(ne)15(g-)]TJ
0 -1.1003 TD
[(ati)25(v)15(e)-274(feedback)-275(for)-272(the)-273(actions)-273(it)-272(tries.)-376(Since)-275(man)15(y)-274(practical,)]TJ
T*
[(real)-346(w)10(orld)-347(problems)-347(\(such)-346(as)-346(robot)-347(control,)-370(g)5(ame)-348(playing,)]TJ
0 -1.0994 TD
[(and)-391(system)-390(optimization\))-391(f)10(all)-390(in)-390(this)-389(cate)15(gory)65(,)-427(de)25(v)15(eloping)]TJ
0 -1.1003 TD
[(ef)25(fecti)25(v)15(e)-273(reinforcement)-274(learning)-272(algorithms)-272(is)-271(critical)-271(to)-271(the)]TJ
T*
[(progress)-250(of)-250(arti)]TJ
/TT4 1 Tf
6.0542 0 Td
(\336)Tj
/TT1 1 Tf
0.5562 0 Td
[(cial)-250(intelligence.)]TJ
-5.6103 -1.122 Td
[(The)-216(most)-214(common)-217(approach)-217(to)-214(reinforcement)-217(learning)-215(re-)]TJ
-1 -1.0994 Td
[(lies)-268(on)]TJ
/TT3 1 Tf
2.9262 0 Td
[(tempor)15(al)-270(dif)18(fer)37(ence)-271(methods)]TJ
/TT1 1 Tf
11.6827 0 Td
[(\(Sutton)-269(&)-269(Barto)-269(1998\),)]TJ
-14.6088 -1.1003 Td
[(which)-399(use)-397(dynamic)-400(programming)-399(and)-399(statistical)-397(sampling)]TJ
0 -1.0994 TD
[(to)-318(estimate)-318(the)-318(long-term)-318(v)25(alue)-320(of)-317(taking)-319(each)-319(possible)-318(ac-)]TJ
0 -1.1003 TD
[(tion)-408(in)-408(each)-410(possible)-409(state.)-784(Once)-410(this)-407(v)25(alue)-410(function)-409(has)]TJ
T*
[(been)-411(learned,)-450(an)-410(ef)25(fecti)25(v)15(e)-412(polic)15(y)-410(can)-411(be)-410(tri)25(vially)-409(deri)25(v)15(ed.)]TJ
ET
54 109.011 59.778 -0.405 re
f
BT
/TT1 1 Tf
8.9664 0 0 8.9664 54 99.9031 Tm
[(Cop)10(yright)]TJ
4.7758 0.0301 Td
(c)Tj
/T1_0 1 Tf
-0.2921 -0.0301 Td
(\001)Tj
/TT1 1 Tf
1.4655 0 Td
[(2005,)-486(American)-439(Association)-439(for)-438(Arti)]TJ
/TT4 1 Tf
15.4916 0 Td
(\336)Tj
/TT1 1 Tf
0.5562 0 Td
[(cial)-438(Intelli-)]TJ
-21.9971 -1.1112 Td
[(gence)-251(\(www)65(.aaai.or)18(g\).)-312(All)-250(rights)-250(reserv)15(ed.)]TJ
9.9626 0 0 9.9626 319.5 583.9771 Tm
[(F)15(or)-347(small)-345(problems,)-370(the)-346(v)25(alue)-348(function)-347(can)-347(be)-347(represented)]TJ
T*
[(in)-261(a)-261(table,)-264(with)-261(one)-262(entry)-262(for)-260(each)-263(possible)-261(state-action)-262(pair)55(.)]TJ
0 -1.0994 TD
[(Ho)25(we)25(v)15(er)40(,)-302(for)-290(most)-289(problems)-291(of)-290(real-w)10(orld)-290(interest,)-299(the)-291(tab-)]TJ
0 -1.1003 TD
[(ular)-368(approach)-370(is)-367(infeasible)-369(because)-369(the)-369(agent)-369(w)10(ould)-369(ne)25(v)15(er)]TJ
T*
[(ha)20(v)15(e)-349(a)-348(chance)-350(to)-348(visit)-346(e)25(v)15(ery)-350(state)-347(much)-349(less)-347(learn)-348(the)-348(cor)20(-)]TJ
0 -1.0994 TD
[(rect)-255(v)25(alue)-257(for)-254(each)-257(state-action)-256(pair)55(.)-323(In)-255(such)-255(cases,)-257(temporal)]TJ
0 -1.1003 TD
[(dif)25(ference)-289(methods)-289(are)-289(coupled)-290(with)-288(a)]TJ
/TT3 1 Tf
15.7571 0 Td
[(function)-289(appr)45(oxima-)]TJ
-15.7571 -1.1003 Td
(tor)Tj
/TT1 1 Tf
1.4714 0 Td
[(which)-283(represents)-283(the)-282(mapping)-284(from)-282(state-action)-283(pairs)-282(to)]TJ
-1.4714 -1.0994 Td
[(v)25(alues)-285(via)-285(a)-284(more)-285(concise,)-294(parameterized)-285(function)-285(and)-285(uses)]TJ
T*
[(supervised)-210(learning)-211(methods)-210(to)-210(set)-209(its)-209(parameters.)-298(Man)15(y)-211(dif-)]TJ
T*
[(ferent)-378(methods)-379(of)-378(function)-379(approximation)-380(ha)20(v)15(e)-380(been)-379(used)]TJ
0 -1.0994 TD
[(successfully)65(,)-315(including)-304(CMA)40(Cs,)-315(radial)-303(basis)-302(functions,)-316(and)]TJ
0 -1.1003 TD
[(neural)-251(netw)10(orks)-251(\(Sutton)-250(&)-250(Barto)-251(1998\).)]TJ
1 -1.1292 Td
[(Function)-281(approximators)-280(mak)10(e)-280(it)-278(possible)-279(to)-279(use)-279(temporal)]TJ
-1 -1.1003 Td
[(dif)25(ference)-292(methods)-291(on)-292(problems)-291(with)-290(lar)18(ge)-291(state)-290(and)-292(action)]TJ
T*
[(spaces.)-380(Ho)25(we)25(v)15(er)40(,)-282(in)-273(practice,)-281(the)-274(feasibility)-273(of)-274(doing)-275(so)-273(de-)]TJ
0 -1.0994 TD
[(pends)-286(lar)18(gely)-285(on)-286(the)-285(ability)-285(of)-285(the)-285(human)-287(designer)-286(to)-285(select)]TJ
0 -1.1003 TD
[(an)-279(appropriate)-279(representation)-280(for)-277(the)-279(function)-279(approximator)]TJ
T*
[(\(e.g.)-302(the)-224(topology)-226(and)-225(initial)-224(weights)-224(of)-224(the)-224(neural)-225(netw)10(ork\).)]TJ
0 -1.0994 TD
[(Unfortunate)-349(design)-348(choices)-349(can)-349(result)-347(in)-347(estimates)-348(that)-347(di-)]TJ
0 -1.1003 TD
[(v)15(er)18(ge)-328(wildly)-327(from)-326(the)-327(optimal)-328(v)25(alue)-328(function)-328(\(Baird)-326(1995\))]TJ
T*
[(and)-251(agents)-251(that)-250(perform)-251(e)15(xtremely)-251(poorly)65(.)]TJ
1 -1.1292 Td
[(The)-266(primary)-265(contrib)20(ution)-266(of)-264(this)-264(thesis)-264(is)-263(a)-265(ne)25(w)-266(approach)]TJ
-1 -1.1003 Td
[(to)-460(doing)-462(function)-461(approximation)-462(that)-461(automates)-461(some)-461(of)]TJ
0 -1.0994 TD
[(these)-333(dif)]TJ
/TT4 1 Tf
3.4736 0 Td
(\336)Tj
/TT1 1 Tf
0.5562 0 Td
[(cult)-333(design)-333(choices.)-558(It)-332(does)-333(so)-332(by)-333(coupling)-334(tem-)]TJ
-4.0297 -1.1003 Td
[(poral)-348(dif)25(ference)-349(methods)-348(with)-347(polic)15(y)-349(search)-348(methods)-349(such)]TJ
0 -1.1003 TD
[(as)-258(e)25(v)20(olutionary)-260(computation.)-335(In)-258(particular)40(,)-260(I)-257(use)-258(NeuroEv)20(o-)]TJ
0 -1.0994 TD
[(lution)-363(of)-362(Augmenting)-364(T)80(opologies)-365(\(NEA)111(T\))-363(\(Stanle)15(y)-363(&)-363(Mi-)]TJ
0 -1.1003 TD
[(ikkulainen)-242(2002\),)-243(a)-240(method)-242(that)-240(uses)-240(e)25(v)20(olutionary)-242(computa-)]TJ
T*
[(tion)-361(to)-361(learn)-361(both)-361(the)-362(topology)-362(and)-362(weights)-361(of)-361(neural)-362(net-)]TJ
0 -1.0994 TD
[(w)10(orks,)-463(in)-422(conjunction)-424(with)-421(Q-learning)-422(\(W)80(atkins)-422(1989\),)-465(a)]TJ
0 -1.1003 TD
[(popular)-275(temporal)-275(dif)25(ference)-275(method.)-382(The)-275(resulting)-273(method,)]TJ
T*
[(called)-207(NEA)111(T+Q,)-207(uses)-206(NEA)111(T)-207(to)-206(learn)-206(the)-206(topology)-208(and)-207(initial)]TJ
0 -1.0994 TD
[(weights)-227(of)-227(netw)10(orks)-228(which)-228(are)-227(then)-228(updated,)-234(via)-227(backpropa-)]TJ
0 -1.1003 TD
[(g)5(ation)-277(\(Bishop)-277(1995\),)-284(to)25(w)10(ards)-276(the)-276(v)25(alue)-278(estimates)-275(pro)15(vided)]TJ
T*
[(by)-251(Q-learning.)]TJ
1 -1.1292 Td
[(In)-251(addition)-253(to)-252(automatically)-253(learning)-253(appropriate)-254(netw)10(ork)]TJ
-1 -1.1003 Td
[(topologies)-242(and)-241(initial)-240(weights)-240(for)-240(Q-learning,)-243(NEA)111(T+Q)-242(also)]TJ
0 -1.0994 TD
[(mak)10(es)-255(it)-254(possible)-255(to)-254(tak)10(e)-256(adv)25(antage)-257(of)-254(the)-255(Baldwin)-256(Ef)25(fect,)-256(a)]TJ
0 -1.1003 TD
[(phenomenon)-342(whereby)-340(populations)-340(whose)-339(indi)25(viduals)-340(learn)]TJ
T*
[(during)-397(their)-397(lifetime)-396(adapt)-398(more)-397(quickly)-398(than)-397(populations)]TJ
0 -1.0994 TD
[(whose)-463(indi)25(viduals)-462(remain)-463(static)-462(\(Baldwin)-462(1896\).)-947(In)-462(the)]TJ
0 -1.1003 TD
[(Baldwin)-284(Ef)25(fect,)-291(which)-284(has)-284(been)-284(demonstrated)-285(in)-283(e)25(v)20(olution-)]TJ
T*
[(ary)-455(computation)-457(\(Ackle)15(y)-456(&)-455(Littman)-454(1991;)-456(Boers,)-507(Borst,)]TJ
0 -1.0994 TD
[(&)-505(Sprinkhuizen-K)15(uyper)-509(1995\),)-570(e)25(v)20(olution)-507(proceeds)-506(more)]TJ
0 -1.1003 TD
[(quickly)-361(because)-363(an)-360(indi)25(vidual)-362(does)-360(not)-361(ha)20(v)15(e)-361(to)-360(be)-361(e)15(xactly)]TJ
ET
q
0 0 612 792 re
W n
/GS1 gs
BT
/T1_1 9 Tf
1 0 0 1 237.006 48 Tm
(AAAI-05 Doctoral Consortium / 1666)Tj
ET
Q
endstream
endobj
45 0 obj
12257
endobj
46 0 obj
<< /Length 47 0 R >>
stream
0 g
1 i
/RelativeColorimetric ri
/GS0 gs
BT
/TT0 1 Tf
9.9626 0 0 9.9626 54 727.9771 Tm
[(right)-281(at)-282(birth;)-296(it)-280(need)-283(only)-283(be)-282(in)-281(the)-282(right)-281(neighborhood)-285(and)]TJ
0 -1.1003 TD
[(learning)-331(will)-330(adjust)-331(it)-330(accordingly)65(.)-554(By)-331(combining)-333(learning)]TJ
/TT1 1 Tf
0 -1.0994 TD
[(acr)45(oss)]TJ
/TT2 1 Tf
2.8887 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(tness)-322(e)25(v)25(aluations)-324(with)-322(learning)]TJ
/TT1 1 Tf
12.8487 0 Td
(within)Tj
/TT2 1 Tf
2.8221 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(tness)-322(e)25(v)25(al-)]TJ
-19.6718 -1.1003 Td
[(uations,)-286(NEA)111(T+Q)-281(has)-279(the)-280(potential)-280(to)-279(reap)-280(the)-279(Baldwin)-280(Ef-)]TJ
0 -1.1003 TD
(fect.)Tj
1 -1.0994 Td
[(This)-277(thesis)-277(presents)-278(empirical)-278(results)-276(from)-277(the)-278(domain)-279(of)]TJ
-1 -1.1003 Td
[(serv)15(er)-426(job)-426(scheduling,)-472(a)-426(challenging)-428(reinforcement)-428(learn-)]TJ
T*
[(ing)-407(task)-407(from)-407(the)-408(b)20(ur)18(geoning)]TJ
/TT2 1 Tf
12.6108 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(eld)-408(of)]TJ
/TT1 1 Tf
2.8704 0 Td
[(autonomic)-410(comput-)]TJ
-16.0374 -1.0994 Td
(ing)Tj
/TT0 1 Tf
1.7792 0 Td
[(\(K)25(ephart)-501(&)-501(Chess)-500(2003\).)-1063(My)-501(e)15(xperiments)-502(demon-)]TJ
-1.7792 -1.1003 Td
[(strate)-416(that)-416(NEA)111(T+Q,)-416(by)-416(automatically)-417(disco)15(v)15(ering)-418(appro-)]TJ
T*
[(priate)-334(topologies)-336(and)-335(initial)-333(weights,)-355(can)-335(dramatically)-336(out-)]TJ
0 -1.0994 TD
[(perform)-255(a)-255(Q-learning)-255(approach)-257(that)-255(uses)-254(manually)-257(designed)]TJ
0 -1.1003 TD
[(neural)-341(netw)10(orks.)-578(These)-341(e)15(xperiments)-341(also)-340(demonstrate)-341(that)]TJ
0 -1.0994 TD
[(when)-306(NEA)111(T)-306(is)-304(used)-306(by)-306(itself)-303(\(i.e.)-475(to)-304(learn)-306(policies)-305(directly)]TJ
0 -1.1003 TD
[(without)-317(estimating)-317(v)25(alue)-319(functions\),)-333(it)-316(does)-318(not)-317(perform)-317(as)]TJ
T*
[(well)-367(as)-367(NEA)111(T+Q,)-369(which)-368(harnesses)-368(the)-367(po)25(wer)-369(of)-367(temporal)]TJ
0 -1.0994 TD
[(dif)25(ference)-252(methods.)]TJ
/TT3 1 Tf
11.9551 0 0 11.9551 108.765 530.562 Tm
[(Additional)-250(Contrib)20(utions)]TJ
/TT0 1 Tf
9.9626 0 0 9.9626 54 517.548 Tm
[(In)-259(addition)-261(to)-260(the)-260(no)15(v)15(el)-262(method)-261(described)-261(abo)15(v)15(e)-263(for)-259(synthe-)]TJ
T*
[(sizing)-341(temporal)-341(dif)25(ference)-341(and)-341(polic)15(y)-341(search)-341(methods,)-363(this)]TJ
0 -1.1003 TD
[(thesis)-192(presents)-193(the)-194(follo)25(wing)-194(additional)-194(contrib)20(utions:)-283(1\))-193(tw)10(o)]TJ
T*
[(no)15(v)15(el)-298(enhancements)-300(to)-296(polic)15(y)-298(search)-297(methods)-297(that)-297(impro)15(v)15(e)]TJ
0 -1.0994 TD
[(their)-331(on-line)-332(performance)-332(by)-332(borro)25(wing)-332(e)15(xploratory)-333(mech-)]TJ
0 -1.1003 TD
[(anisms)-448(from)-448(temporal)-449(dif)25(ference)-450(methods,)-499(and)-449(2\))-448(a)-448(ne)25(w)]TJ
T*
[(method)-357(called)-357(FS-NEA)111(T)-357(that)-355(uses)-356(neuroe)25(v)20(olution)-359(to)-355(auto-)]TJ
0 -1.0994 TD
[(mate)-261(the)-262(task)-260(of)-261(feature)-261(selection)-262(in)-260(reinforcement)-263(learning.)]TJ
0 -1.1003 TD
[(The)-249(follo)25(wing)-249(subsections)-248(pro)15(vide)-250(a)-248(brief)-247(o)15(v)15(ervie)25(w)-251(of)-247(these)]TJ
T*
[(contrib)20(utions.)]TJ
/TT3 1 Tf
10.9091 0 0 10.9091 54 398.838 Tm
[(On-Line)-250(P)20(olicy)-250(Sear)18(ch)]TJ
/TT0 1 Tf
9.9626 0 0 9.9626 54 385.833 Tm
[(Reinforcement)-576(learning)-575(agents)-575(are)-574(typically)-575(trained)-574(us-)]TJ
T*
[(ing)-312(polic)15(y)-313(search)-312(methods)-313(or)-312(temporal)-313(dif)25(ference)-313(methods.)]TJ
T*
[(While)-457(there)-457(is)-456(much)-458(debate)-459(about)-458(the)-458(ef)]TJ
/TT2 1 Tf
17.3382 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(cac)15(y)-460(of)-456(polic)15(y)]TJ
-17.8944 -1.0994 Td
[(search)-438(methods,)-485(there)-437(are)-437(some)-438(problems)-438(on)-437(which)-438(the)15(y)]TJ
T*
[(ha)20(v)15(e)-275(achie)25(v)15(ed)-277(the)-275(best)-274(results)-273(to)-275(date)-275(\(Bagnell)-275(&)-274(Schneider)]TJ
T*
[(2001;)-218(K)35(ohl)-218(&)-216(Stone)-218(2004\).)-300(Ho)25(we)25(v)15(er)40(,)-226(polic)15(y)-218(search)-218(methods)]TJ
0 -1.0994 TD
[(do)-231(not)-231(f)10(are)-230(well)-231(in)]TJ
/TT1 1 Tf
7.4756 0 Td
(on-line)Tj
/TT0 1 Tf
3.0638 0 Td
[(scenarios,)-235(in)-231(which)-231(there)-231(are)-231(real-)]TJ
-10.5394 -1.1003 Td
[(w)10(orld)-354(consequences)-357(for)-354(the)-354(agent\222)55(s)-354(beha)20(vior)-356(during)-355(learn-)]TJ
0 -1.1003 TD
[(ing.)-453(The)15(y)-299(do)-298(not)-298(e)15(xcel)-300(in)-297(such)-298(scenarios)-298(because)-300(the)15(y)-299(lack)]TJ
0 -1.0994 TD
[(mechanisms)-242(for)-241(balancing)-243(the)-241(need)-243(to)-241(search)-242(for)-240(better)-241(poli-)]TJ
0 -1.1003 TD
[(cies)-333(\(e)15(xploration\))-334(with)-333(the)-333(need)-334(to)-333(accrue)-334(maximal)-334(re)25(w)10(ard)]TJ
T*
[(\(e)15(xploitation\).)-319(This)-253(thesis)-252(presents)-253(tw)10(o)-253(no)15(v)15(el)-255(enhancements)]TJ
0 -1.0994 TD
[(to)-313(polic)15(y)-315(search)-314(methods)-315(that)-314(impro)15(v)15(e)-315(their)-313(on-line)-314(perfor)20(-)]TJ
0 -1.1003 TD
[(mance)-314(by)-313(borro)25(wing)-313(e)15(xploratory)-314(mechanisms)-314(from)-312(tempo-)]TJ
0 -1.0994 TD
[(ral)-375(dif)25(ference)-377(methods.)-687(The)]TJ
/TT2 1 Tf
12.0899 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(rst)-375(modi)]TJ
/TT2 1 Tf
3.4306 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(cation,)-409(which)-377(can)]TJ
-16.6327 -1.1003 Td
[(be)-281(applied)-281(to)-280(an)15(y)-282(polic)15(y)-281(search)-281(method,)-289(borro)25(ws)-280(the)-281(notion)]TJ
0 -1.1003 TD
(of)Tj
/T1_0 1 Tf
1.0968 0 Td
(\001)Tj
/TT0 1 Tf
0.4056 0 Td
[(-greedy)-265(e)15(xploration,)-269(resulting)-264(in)]TJ
/T1_0 1 Tf
13.1814 0 Td
(\001)Tj
/TT1 1 Tf
0.4056 0 Td
[(-gr)37(eedy)-266(policy)-265(sear)37(c)15(h)]TJ
/TT0 1 Tf
8.6039 0 Td
(.)Tj
-23.6933 -1.0994 Td
[(This)-233(algorithm)-234(switches)-233(probabilistically)-234(between)-235(searching)]TJ
T*
[(for)-337(better)-339(policies)-338(and)-339(re-e)25(v)25(aluating)-341(the)-338(best)-338(kno)25(wn)-340(polic)15(y)]TJ
T*
[(to)-275(g)5(arner)-276(maximal)-276(re)25(w)10(ard.)-385(The)-276(second)-276(modi)]TJ
/TT2 1 Tf
17.9682 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(cation,)-284(which)]TJ
-18.5243 -1.0994 Td
[(requires)-280(a)-280(population-based)-284(polic)15(y)-281(search)-281(method)-281(such)-281(as)-280(a)]TJ
T*
[(genetic)-372(algorithm)-371(\(Goldber)18(g)-372(1989\),)-402(borro)25(ws)-371(the)-371(notion)-372(of)]TJ
T*
[(softmax)-255(selection,)-257(resulting)-255(in)]TJ
/TT1 1 Tf
12.3231 0 Td
[(softmax)-256(policy)-256(sear)37(c)15(h)]TJ
/TT0 1 Tf
8.6791 0 Td
[(.)-324(It)-254(dis-)]TJ
-21.0023 -1.0994 Td
[(trib)20(utes)-346(e)25(v)25(aluations)-349(in)-347(proportion)-348(to)-347(each)-348(indi)25(vidual\222)55(s)-348(esti-)]TJ
T*
(mated)Tj
/TT2 1 Tf
2.7108 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(tness,)-271(thereby)-267(focusing)-267(on)-267(the)-267(most)-266(promising)-267(indi-)]TJ
-3.2669 -1.1003 Td
[(viduals)-316(and)-316(increasing)-317(the)-316(a)20(v)15(erage)-317(re)25(w)10(ard)-317(accrued.)-508(I)-314(com-)]TJ
0 -1.0994 TD
[(pare)-380(the)-380(resulting)-379(methods)-380(in)-380(tw)10(o)-379(domains:)-570(ele)25(v)25(ator)-381(con-)]TJ
0 -1.1003 TD
[(trol)-257(and)-258(auto)-257(racing.)-331(The)-258(results)-256(demonstrate)-259(that)-257(these)-257(ne)25(w)]TJ
26.6496 64.043 Td
[(techniques)-247(signi)]TJ
/TT2 1 Tf
6.4687 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(cantly)-247(impro)15(v)15(e)-247(the)-247(on-line)-247(performance)-247(of)]TJ
-7.0249 -1.1003 Td
[(NEA)111(T)74(,)-251(a)-250(neuroe)25(v)20(olutionary)-254(polic)15(y)-252(search)-251(method.)]TJ
/TT3 1 Tf
10.9091 0 0 10.9091 319.5 696.657 Tm
[(A)50(utomatic)-250(F)25(eatur)18(e)-249(Selection)-250(via)-250(Neur)18(oe)15(v)10(olution)]TJ
/TT0 1 Tf
9.9626 0 0 9.9626 319.5 683.337 Tm
[(Feature)-323(selection)-323(is)-321(the)-323(process)-322(of)]TJ
/TT2 1 Tf
14.2085 0 Td
(\336)Tj
/TT0 1 Tf
0.5561 0 Td
[(nding)-324(the)-322(set)-322(of)-322(inputs)]TJ
-14.7646 -1.1003 Td
[(to)-268(a)-269(machine)-271(learning)-269(algorithm)-269(that)-269(will)-268(yield)-269(the)-268(best)-269(per)20(-)]TJ
T*
[(formance.)-308(De)25(v)15(eloping)-242(a)-240(w)10(ay)-239(to)-239(solv)15(e)-240(this)-239(problem)-240(automat-)]TJ
0 -1.0994 TD
[(ically)-281(w)10(ould)-282(mak)10(e)-282(current)-282(machine)-282(learning)-282(methods)-282(much)]TJ
0 -1.1003 TD
[(more)-312(useful.)-491(Pre)25(vious)-313(ef)25(forts)-310(to)-311(automate)-313(feature)-312(selection)]TJ
T*
[(rely)-222(on)-223(e)15(xpensi)25(v)15(e)-226(meta-learning)-225(or)-222(are)-222(applicable)-225(only)-223(when)]TJ
0 -1.0994 TD
[(labeled)-193(training)-192(data)-192(is)-191(a)20(v)25(ailable.)-292(This)-192(thesis)-191(presents)-192(a)-192(no)15(v)15(el)]TJ
0 -1.1003 TD
[(method)-284(called)-283(FS-NEA)111(T)-284(which)-283(e)15(xtends)-284(the)-283(NEA)111(T)-283(\(Stanle)15(y)]TJ
0 -1.0994 TD
[(&)-303(Miikkulainen)-305(2002\))-304(neuroe)25(v)20(olution)-306(method)-304(to)-303(automati-)]TJ
0 -1.1003 TD
[(cally)-230(determine)-231(an)-230(appropriate)-231(set)-229(of)-229(inputs)-229(for)-229(the)-230(netw)10(orks)]TJ
T*
[(it)-306(e)25(v)20(olv)15(es.)-483(By)-308(learning)-308(the)-307(netw)10(ork\222)55(s)-308(inputs,)-322(topology)65(,)-323(and)]TJ
0 -1.0994 TD
[(weights)-266(simultaneously)65(,)-269(FS-NEA)111(T)-266(addresses)-266(the)-265(feature)-266(se-)]TJ
0 -1.1003 TD
[(lection)-247(problem)-247(without)-247(relying)-247(on)-247(meta-learning)-247(or)-246(labeled)]TJ
T*
[(data.)-339(Initial)-258(e)15(xperiments)-261(in)-260(an)-260(autonomous)-262(car)-259(racing)-260(simu-)]TJ
0 -1.0994 TD
[(lation)-245(demonstrate)-246(that)-245(FS-NEA)111(T)-246(can)-246(learn)-246(better)-245(and)-246(f)10(aster)]TJ
0 -1.1003 TD
[(than)-295(re)15(gular)-295(NEA)111(T)74(.)-294(In)-294(addition,)-306(the)-295(netw)10(orks)-294(it)-294(e)25(v)20(olv)15(es)-296(are)]TJ
T*
[(smaller)-286(and)-287(require)-286(fe)25(wer)-287(inputs.)-418(Furthermore,)-296(FS-NEA)111(T\222)55(s)]TJ
0 -1.0994 TD
[(performance)-432(remains)-430(rob)20(ust)-430(e)25(v)15(en)-432(as)-429(the)-430(feature)-431(selection)]TJ
0 -1.1003 TD
[(task)-250(it)-249(f)10(aces)-251(is)-249(made)-251(increasingly)-251(dif)]TJ
/TT2 1 Tf
14.5179 0 Td
(\336)Tj
/TT0 1 Tf
0.5561 0 Td
(cult.)Tj
/TT3 1 Tf
11.9551 0 0 11.9551 410.976 469.389 Tm
[(Refer)18(ences)]TJ
/TT0 1 Tf
9.9626 0 0 9.9626 324.477 456.0691 Tm
[(Ackle)15(y)65(,)-343(D.,)-341(and)-324(Littman,)-341(M.)-593(1991.)-594(Interactions)-324(between)]TJ
T*
[(learning)-253(and)-253(e)25(v)20(olution.)]TJ
/TT1 1 Tf
9.5742 0 Td
(Arti)Tj
/TT4 1 Tf
1.5557 0 Td
(\336)Tj
/TT1 1 Tf
0.5 0 Td
[(cial)-251(Life)-252(II,)-250(SFI)-252(Studies)-252(in)-251(the)]TJ
-11.6298 -1.0994 Td
[(Sciences)-252(of)-250(Comple)20(xity)]TJ
/TT0 1 Tf
9.5089 0 Td
(10:487\226509.)Tj
-9.5089 -1.3415 Td
[(Bagnell,)-425(J.,)-422(and)-389(Schneider)40(,)-425(J.)-801(2001.)-803(Autonomous)-390(heli-)]TJ
0 -1.0994 TD
[(copter)-338(control)-336(using)-337(reinforcement)-338(learning)-337(polic)15(y)-338(search)]TJ
0 -1.1003 TD
[(methods.)-577(In)]TJ
/TT1 1 Tf
5.366 0 Td
[(Pr)45(oceedings)-321(of)-317(the)-318(International)-319(Confer)37(ence)]TJ
-5.366 -1.1003 Td
[(on)-251(Robotics)-251(and)-251(A)20(utomation)-252(2001)]TJ
/TT0 1 Tf
13.6516 0 Td
[(.)-361(IEEE.)]TJ
-13.6516 -1.3406 Td
[(Baird,)-375(L.)-350(C.)-678(1995.)-680(Residual)-350(algorithms:)-509(Reinforcement)]TJ
T*
[(learning)-349(with)-347(function)-348(approximation.)-672(In)]TJ
/TT1 1 Tf
17.3642 0 Td
[(Pr)45(oceedings)-350(of)]TJ
-17.3642 -1.0994 Td
[(the)-373(T)74(welfth)-373(International)-374(Confer)37(ence)-375(on)-373(Mac)15(hine)-375(Learn-)]TJ
T*
(ing)Tj
/TT0 1 Tf
1.2778 0 Td
[(,)-251(30\22637.)-361(Mor)18(g)5(an)-252(Kaufmann.)]TJ
-1.2778 -1.3406 Td
[(Baldwin,)-429(J.)-391(M.)-813(1896.)-815(A)-392(ne)25(w)-393(f)10(actor)-392(in)-392(e)25(v)20(olution.)]TJ
/TT1 1 Tf
21.9446 0 Td
(The)Tj
-21.9446 -1.1003 Td
[(American)-252(Natur)15(alist)]TJ
/TT0 1 Tf
8.4313 0 Td
(30:441\226451.)Tj
-8.4313 -1.3406 Td
[(Bishop,)-289(C.)-280(M.)-455(1995.)]TJ
/TT1 1 Tf
8.8726 0 Td
[(Neur)15(al)-281(Networks)-281(for)-280(P)80(attern)-281(Reco)10(g-)]TJ
-8.8726 -1.1003 Td
(nition)Tj
/TT0 1 Tf
2.3335 0 Td
(.)Tj
-2.3335 -1.3406 Td
[(Boers,)-352(E.;)-370(Borst,)-351(M.;)-370(and)-332(Sprinkhuizen-K)15(uyper)40(,)-356(I.)-618(1995.)]TJ
T*
[(Ev)20(olving)-396(Arti)]TJ
/TT2 1 Tf
5.6531 0 Td
(\336)Tj
/TT0 1 Tf
0.5562 0 Td
[(cial)-394(Neural)-394(Netw)10(orks)-395(using)-394(the)-394(\223Baldwin)]TJ
-6.2093 -1.1003 Td
[(Ef)25(fect\224.)-361(T)70(echnical)-252(Report)-251(TR)-250(95-14.)]TJ
0 -1.3406 TD
[(Goldber)18(g,)-277(D.)-271(E.)-425(1989.)]TJ
/TT1 1 Tf
9.4929 0 Td
[(Genetic)-272(Algorithms)-271(in)-270(Sear)37(c)15(h,)-278(Op-)]TJ
-9.4929 -1.1003 Td
[(timization)-250(and)-251(Mac)15(hine)-253(Learning)]TJ
/TT0 1 Tf
13.405 0 Td
(.)Tj
-13.405 -1.3406 Td
[(K)25(ephart,)-428(J.)-390(O.,)-425(and)-393(Chess,)-426(D.)-390(M.)-811(2003.)-813(The)-392(vision)-391(of)]TJ
0 -1.1003 TD
[(autonomic)-253(computing.)]TJ
/TT1 1 Tf
9.3637 0 Td
(Computer)Tj
/TT0 1 Tf
4.2511 0 Td
(36\(1\):41\22650.)Tj
-13.6149 -1.3406 Td
[(K)35(ohl,)-360(N.,)-359(and)-339(Stone,)-360(P)111(.)-641(2004.)-641(Machine)-340(learning)-339(for)-338(f)10(ast)]TJ
T*
[(quadrupedal)-242(locomotion.)-335(In)]TJ
/TT1 1 Tf
11.3963 0 Td
[(The)-238(Nineteenth)-240(National)-239(Con-)]TJ
-11.3963 -1.0994 Td
[(fer)37(ence)-252(on)-251(Arti)]TJ
/TT4 1 Tf
5.9642 0 Td
(\336)Tj
/TT1 1 Tf
0.5 0 Td
[(cial)-250(Intellig)10(ence)]TJ
/TT0 1 Tf
6.4595 0 Td
[(,)-251(611\226616.)]TJ
-12.9237 -1.3415 Td
[(Stanle)15(y)65(,)-269(K.)-264(O.,)-267(and)-265(Miikkulainen,)-269(R.)-405(2002.)-406(Ev)20(olving)-266(neu-)]TJ
0 -1.0994 TD
[(ral)-233(netw)10(orks)-235(through)-235(augmenting)-236(topologies.)]TJ
/TT1 1 Tf
18.2231 0 Td
(Evolutionary)Tj
-18.2231 -1.1003 Td
(Computation)Tj
/TT0 1 Tf
5.4756 0 Td
(10\(2\):99\226127.)Tj
-5.4756 -1.3406 Td
[(Sutton,)-215(R.)-204(S.,)-213(and)-205(Barto,)-214(A.)-204(G.)-260(1998.)]TJ
/TT1 1 Tf
14.7227 0 Td
[(Reinfor)37(cement)-207(Learn-)]TJ
-14.7227 -1.1003 Td
[(ing:)-311(An)-251(Intr)45(oduction)]TJ
/TT0 1 Tf
8.2379 0 Td
[(.)-361(Cambridge,)-251(MA:)-250(MIT)-250(Press.)]TJ
-8.2379 -1.3406 Td
[(W)80(atkins,)-241(C.)-332(1989.)]TJ
/TT1 1 Tf
7.577 0 Td
[(Learning)-239(fr)45(om)-238(Delayed)-239(Re)15(war)37(ds)]TJ
/TT0 1 Tf
13.0065 0 Td
[(.)-334(Ph.D.)]TJ
-20.5835 -1.1003 Td
[(Dissertation,)-250(King\222)55(s)-250(Colle)15(ge,)-251(Cambridge.)]TJ
ET
q
0 0 612 792 re
W n
/GS1 gs
BT
/T1_1 9 Tf
1 0 0 1 237.006 48 Tm
(AAAI-05 Doctoral Consortium / 1667)Tj
ET
Q
endstream
endobj
47 0 obj
14822
endobj
48 0 obj
<< /Type /Metadata /Subtype /XML /Length 3233 >>
stream
PSNormalizer.framework
reinforcement learning; evolutionary computation
2005-03-29T17:40:16-06:00
2005-05-24T00:21:55-07:00
Improving Reinforcement Learning Function Approximators via Neuroevolution
Doctoral Consortium
Shimon Whiteson
2005-03-29T17:40:16-06:00
2005-05-24T00:21:55-07:00
2005-05-24T00:21:55-07:00
application/pdf
Improving Reinforcement Learning Function Approximators via Neuroevolution
Doctoral Consortium
Copyright © 2005, American Association for Artificial Intelligence. All rights reserved. No part of this book may be used or reproduced in any manner whatsoever without written permission.
Shimon Whiteson
reinforcement learning
evolutionary computation
uuid:9405beac-c9c3-11d9-965e-000393821e34
uuid:804097fb-c9c6-11d9-a607-000393821e34
http://www.aaai.org
True
application/pdf
Improving Reinforcement Learning Function Approximators via Neuroevolution
Doctoral Consortium
Copyright © 2005, American Association for Artificial Intelligence. All rights reserved. No part of this book may be used or reproduced in any manner whatsoever without written permission.
Shimon Whiteson
reinforcement learning
evolutionary computation
endstream
endobj
xref
0 49
0000000002 65535 f
0000000016 00000 n
0000000003 00001 f
0000000013 00001 f
0000000294 00000 n
0000000459 00000 n
0000000861 00000 n
0000001411 00000 n
0000001839 00000 n
0000002266 00000 n
0000002447 00000 n
0000002600 00000 n
0000002699 00000 n
0000000014 00001 f
0000000031 00001 f
0000002980 00000 n
0000003146 00000 n
0000003329 00000 n
0000003569 00000 n
0000003937 00000 n
0000004178 00000 n
0000004538 00000 n
0000004755 00000 n
0000004967 00000 n
0000005173 00000 n
0000005388 00000 n
0000005600 00000 n
0000005815 00000 n
0000005890 00000 n
0000005961 00000 n
0000005992 00000 n
0000000036 00001 f
0000006036 00000 n
0000006151 00000 n
0000006192 00000 n
0000006228 00000 n
0000000037 00001 f
0000000039 00001 f
0000006253 00000 n
0000000000 00001 f
0000006591 00000 n
0000006797 00000 n
0000006826 00000 n
0000006922 00000 n
0000008247 00000 n
0000020560 00000 n
0000020583 00000 n
0000035461 00000 n
0000035484 00000 n
trailer
<<
/Size 49
/Info 38 0 R
/Root 32 0 R
/ID[<96ced330f35d9d51f24d47a0ef419216>]
>>
startxref
38802
%%EOF