unit U_TicTacToeMachine;
 {Copyright 2002, Gary Darby, Intellitech Systems Inc., www.DelphiForFun.org

 This program may be used or modified for any non-commercial purpose
 so long as this original notice remains in place.
 All other rights are reserved
 }

{
An artificial intelligence demonstration of a
"machine" that learns to play tic-tac-toe by trial
and error.    This is a computer model of the
original machine, MENACE,  invented by
Donald Michie in 1961 using 300 matchboxes
representing 300 board positions.  Each box
contains colored beads for each available cell.

The machine always plays first.  It plays by
selecting a bead randomly from the box
representing the current board configurations.
When it wins or draws it is "rewarded" by
adding beads of the winning move colors  to
each box used.  Losses are punished by
confiscating the selected beads.

You may click avalaible cells to play against the
machine or use the "Random" button to train
the machine.  It may take a few thousand
random games to train it well.
}

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
  StdCtrls, Spin, ExtCtrls;

type
  TweightArray=array[1..9] of integer;
  PWeightrec=^TWeightrec;
  TWeightrec=record  {a record saved with each board position containing more
                      info about the position}
    w:TWeightArray;{array of weights (# of beads of each color) for possible moves}
    movelevel:integer; {which level are we at}
    lastchoice:byte;   {the cell filled to get to this position}
  end;


  TForm1 = class(TForm)
    ProtoEdt: TEdit;
    ResetBtn: TButton;
    ListBox1: TListBox;
    ListBox2: TListBox;
    Label1: TLabel;
    StartBtn: TButton;
    Run100Btn: TButton;
    ResetWeightsBtn: TButton;
    GameCount: TSpinEdit;
    Label2: TLabel;
    Label3: TLabel;
    DebugPanel: TPanel;
    Label5: TLabel;
    Label4: TLabel;
    ListBox3: TListBox;
    DebugBtn: TButton;
    Memo1: TMemo;
    procedure FormActivate(Sender: TObject);
    procedure ResetBtnClick(Sender: TObject);
    procedure StartBtnClick(Sender: TObject);
    procedure Run100BtnClick(Sender: TObject);
    procedure ResetWeightsBtnClick(Sender: TObject);
    procedure DebugBtnClick(Sender: TObject);
  public
    board:array[0..8] of TEdit; {array of TEdits visualizing the board}
    level:integer;    {current level}
    winlevel:integer;
    xwins, Owins, totgames: integer;  {statistics}
    strkey:string; {9 character string representation of board}
    Positions:TStringList; {list of 304 "matchboxes" representing possible positions}
    gameover:boolean;
    autoplay:boolean;
    moves:array[1..4] of integer; {positions list index values for current game}
    nbrmoves:integer; {how many moves in current game (X moves only)}
    procedure BoardKeyClick(Sender: TObject); {Human move maker}
    {procedure BoardToKey;}
    procedure GenNext(key:string;templevel:integer); {Initialize positions list}
    function FindTransform(key:String;  {loojk for key in the list, allowing
                                         rotations and reflections}
                          var TransformNbr, listposition:integer):boolean;
    function transform(s:string;t:integer):string; {get a new key after
                                                    transform "t"}
    function inversetransform(s:string;t:integer):string; {reverse a transform}
    procedure makemove; {machine move maker}
    function winner(p:char):boolean; {check for 3 in a row for player "p"}
    procedure wrapup(p:char); {end of game total and adjust weights}
    procedure initweights; {initialize weights}
  end;

var Form1: TForm1;

implementation

{$R *.DFM}

Uses UMakeCaption;

var
  transforms:array[0..7] of array[1..9] of integer =  {the 8 transforms}
     {identity}   ((1,2,3,4,5,6,7,8,9),   {"moveto" index values}
     {right90}     (7,4,1,8,5,2,9,6,3),
     {left90}      (3,6,9,2,5,8,1,4,7),
     {180}         (9,8,7,6,5,4,3,2,1),
     {Mirror}      (7,8,9,4,5,6,1,2,3),
     {Mirror+Left} (9,6,3,8,5,2,7,4,1),
     {Mirror+Right}(1,4,7,2,5,8,3,6,9),
     {Mirror+180}  (3,2,1,6,5,4,9,8,7)
     );

const
   {initial bead counts of each availablr cell color for machines 1st 4 moves}
   initialweights:array[0..3] of integer=(4,3,2,1);

{**************** FormActivate **************}
procedure TForm1.FormActivate(Sender:TObject);
var
  i:integer;
  fname:string;
  c1,c3,c5,c7:integer;
begin
  makecaption('Tic-Tac-Toe Machine', #169+' 2002 G Darby, www.delphiforfun.org',self);
  randomize;
  level:=1;
  {create 9 Tedits as the board array}
  for i:=0 to 8 do
  begin
    board[i]:=TEdit.create(self);
    with board[i] do
    begin
      parent:=self;
      autosize:=protoedt.autosize;
      width:=protoedt.width;
      height:=protoedt.height;
      {move left 0,1, or 2 times the width of prototype,
       subtract 0, 1 or 2 pixels to make interior borders overlap}
      left:= protoedt.left+(i mod 3)*protoedt.width- i mod 3;
      {move down 0,1, or 2 times the height of prototype,
       subtract 0, 1 or 2 pixels to make interior borders overlap}
      top:= protoedt.top+(i div 3)*protoedt.height- i div 3;
      text:=' ';
      tag:=i+1;  {index of this cell's position in the key "strkey"}
      font.size:=protoedt.Font.size;
      font.style:=protoedt.font.style;
      borderstyle:=protoedt.borderstyle;
      onclick:=BoardkeyClick;
      readonly:=true;
    end;
  end;
  {generate all matchboxes for 1st mover, "X"}
  Positions:=TStringList.create;
  positions.sorted:=true;
  strkey:=stringofchar('-',9);
  fname:=extractfilepath(application.exename)+'Positions.str';
  {load positions if file exists}
  if fileexists(fname) then positions.loadfromfile(fname)
  else
  begin
    Gennext(strkey,0);  {recursive procedure that builds the list
                         of "matchboxes" (positions)}
    positions.savetofile(fname);
  end;
  Initweights; {Add the "beads" to the "matchboxes"}
  {accumulate # of moves for each level for diplay}
  c1:=0; c3:=0; c5:=0; c7:=0;
  for i:=0 to positions.count-1 do
  begin
    with pweightrec(positions.objects[i])^ do
    case movelevel of
    1: inc(c1);
    3: inc(c3);
    5: inc(c5);
    7: inc(c7);
    end;
  end;
  listbox2.items.add('There are '+ inttostr(c1) + ' level 1 positions');
  listbox2.items.add('There are '+ inttostr(c3) + ' level 3 positions');
  listbox2.items.add('There are '+ inttostr(c5) + ' level 5 positions');
  listbox2.items.add('There are '+ inttostr(c7) + ' level 7 positions');
end;

{******************* Initweights *************}
procedure TForm1.initweights;
  var
    i,j,v,n:integer;
    initialcount:integer;
    s, wstr:string;
    P:Pweightrec;
  begin
    for i:= 0 to positions.count-1 do
    with positions do
    begin
      s:=strings[i];
      n:=0;
      {count the "O"s to determine level}
      for j:= 1 to 9 do  if s[j]='O' then inc(n);
      initialcount:=initialweights[n];
      new(P); {make a new weightrec}
      objects[i]:=pointer(p);
      wstr:='';
      with Pweightrec(objects[i])^ do
      begin
        for j:=1 to 9 do {Assign the initial bead count to each empty slot}
        begin
          if s[j]='-' then v:=initialcount else v:=0;
          w[j]:=v;
          wstr:=wstr+inttostr(v)+',';
        end;
        lastchoice:=0; {initialize lastchoice variable}
        movelevel:=2*n+1;
      end;
      listbox1.items.add(strings[i]+'('+wstr+')');
    end;
  end;{initweights}

{**************** Transform ****************}
function TForm1.transform(s:string;t:integer):string;
{given a key and a transform, return the new key, rotated and/or mirrored}
var i:integer;
begin
  result:=s;
  for i:=1 to 9 do
  result[i]:=s[transforms[t,i]];
end;

{**************** InverseTransform ****************}
function TForm1.InverseTransform(s:string;t:integer):string;
{Reverse the previous transform t, return the new key}
var i:integer;
begin
  case t of
      1: {right90} t:=2; {left 90}
      2: {left90}  t:=1; {right 90}
   end; {all the other transformations are reflexive, i.e. T(T(s))=s}
  result:=s;
  for i:=1 to 9 do
  result[i]:=s[transforms[t,i]];
end;

{************** Winner *************}
function TForm1.winner(p:char):boolean;
{Check for three in a row}
var s:string;
begin
  s:=strkey;
  if (  (s[1]=p) and (s[2]=p) and (s[3]=p) )
    or ((s[4]=p) and (s[5]=p) and (s[6]=p) )
    or ((s[7]=p) and (s[8]=p) and (s[9]=p) )
    or ((s[1]=p) and (s[4]=p) and (s[7]=p) )
    or ((s[2]=p) and (s[5]=p) and (s[8]=p) )
    or ((s[3]=p) and (s[6]=p) and (s[9]=p) )
    or ((s[1]=p) and (s[5]=p) and (s[9]=p) )
    or ((s[3]=p) and (s[5]=p) and (s[7]=p) )
    then result:=true  else result:=false;
end;

{**************** Wrapup *************}
procedure Tform1.wrapup(p:char);
{game over - adjust weight and reset board image for next game}
var
  i,reward, rewardlast:integer;
  msg:string;
begin
  case p of
    'X':
    begin
      msg:='X wins!';
      inc(xwins);
      reward:=+4;
      rewardlast:=1000;  {big reward at the last level for winning}
    end;
    'O':
    begin
      msg:='O wins';
      inc(OWins);
      reward:=-1;
      rewardlast:=-1000; {big punishment for the losing move}
    end;
    else
    begin
     msg:='A draw!';
     reward:=+1;
     rewardlast:=+1;
    end;
  end;
  If not autoplay then showmessage(Msg);
  inc(totgames);

  for i:=1 to nbrmoves do
  with PWeightrec(positions.objects[moves[i]])^ do
  begin
    if i<nbrmoves then w[lastchoice]:=w[lastchoice]+reward
    else w[lastchoice]:=w[lastchoice]+rewardlast;
    if w[lastchoice]<0 then w[lastchoice]:=0;  {don't let weights go negative}
    lastchoice:=0;
  end;
  listbox2.clear;
  listbox2.items.add(inttostr(totgames)+ ' games played');
  listbox2.items.add(inttostr(Xwins)+ ' won by machine');
  listbox2.items.add(inttostr(Owins)+ ' won by opponent');
  listbox2.items.add(inttostr(totgames-XWins-Owins)+ ' draws');
  gameover:=true;
end;


{************* Makemove ************}
 procedure TForm1.makemove;
 {Computer makes a move}
 var i,n, tnbr,sum:integer;
     listposition:integer;
     newkey, wstr:string;
     prob:single;
 begin
   {
    1. Find the current board position in the list, (transform if necessary),
    2. Select a random move based on weight values for next moves that
       are contained with each list entry.
    3. Update lastchoice value in the list entry and make the move
    }
   if level=9 then {fill in the last position}
   begin
     for i:=1 to 9 do if strkey[i]='-' then strkey[i]:='X';
   end
   else
   begin  {Look for board configuration in the positions list}
     If not findtransform(strkey, tnbr, listposition) then
     begin
       showmessage('System error - position '+strkey +' not found in table');
       i:=random(8);
       if i>0 then strkey:=transform(strkey,random(8));
     end
     else
     with Pweightrec(positions.objects[listposition])^ do
     begin
       newkey:=transform(strkey,tnbr);
       sum:=0;
       for i:=1 to 9 do
       begin
         sum:=sum+w[i];
         wstr:=wstr+inttostr(w[i])+',';
       end;
       if sum>0 then
       begin
         n:=random(sum)+1;  { get a random value of the sum of beads}
         i:=0;
         sum:=0;
         while  sum<n do {and count beads up until we exceed the value}
         begin
           inc(i);
           if i>9
           then showmessage('Weights error');
           sum:=sum+w[i];
         end;
         prob:=w[i]/sum; {probability for display}
       end
       else
       begin
         {Zero weights?  just move to 1st available cell}
         prob:=1/(10-movelevel);
         for i:= 1 to 9 do if strkey[i]='-' then break;
       end;
       listbox3.items.add('Transform('+strkey+','+inttostr(tnbr)+') = '+newkey);
       listbox3.items.add('    Weights:'+wstr);

       listbox3.items.add('    Selected cell:'+inttostr(i)+ '( probability '+
                         format('%4.2f',[prob]) +')');
       newkey[i]:='X';
       lastchoice:=i;
       if tnbr>0 then strkey:=inverseTransform(newkey,tnbr)
       else strkey:=newkey;
       listbox3.items.add('    InvTransform('+newkey+','+inttostr(tnbr)+') = '+strkey);
       inc(nbrmoves);
       moves[nbrmoves]:=listposition;
     end;
   end;
   for i:=1 to length(strkey) do {fill in the visual board}
     if strkey[i]<>'-' then board[i-1].text:=strkey[i]
     else board[i-1].text:=' ';
   if winner('X') then  {check if we won}
   begin
     winlevel:=level;
     wrapup('X');
   end
   else if level=9 then wrapup('-')
   else inc(level);
 end;  {makemove}

{**************** GenNext **************}
procedure TForm1.GenNext(key:string;templevel:integer);
{Generates the 304 "Matchboxes" that make up the machine,
 normally one time only since psitions are saved in a file}

   procedure posadd(newkey:string);  {add non-winning positions to the list}
   begin
     strkey:=newkey;
     {no need to put winning positions in the list}
     if (not winner('O')) and (not winner('X')) then positions.add(newkey);
   end;

var
  ch:char;
  newkey:string;
  i,index, listpos:integer;
begin
  if templevel mod 2=1 then ch:='X' else if templevel>1 then ch:='O' else ch:='-';
  for i :=1 to length(key) do
  begin
    if key[i]='-' then
    begin
      newkey:=key;
      newkey[i]:=ch;
      if templevel mod 2=0 then {save positions after "O" moves if we haven't already}
      begin
        index:=positions.indexof(newkey);
        if index<0 then {not found}
        begin
          if not FindTransform(newkey, index,listpos) {and if not saved rotated }
          then  posadd(newkey);         {or mirrored version, then add it}
        end;
      end;
      if templevel<6 then GenNext(newkey,templevel+1); {generate next level, up to 6}
    end;
    if templevel=0 then break;
  end;
end;

{****************** FindTransorm ************}
function TForm1.FindTransform(key:String;
                          var transformNbr, listposition:integer):boolean;
{try all unique rotations & reflections looking for match already in list}
var
  i,j:integer;
  tempkey:string;
begin
  tempkey:=key;
  for i:=0 to 7 do
  begin
    for j:=1 to 9 do tempkey[j]:=key[transforms[i,j]];
    {debug - listbox2.items.add(tempkey);}
    result:=positions.find(tempkey,listposition);
    if result then
    begin
      transformNbr:=i;
      break;
    end;
  end;
end;


{***************** BoardKeyClick ************}
procedure TForm1.BoardKeyClick(Sender:TObject);
{Accept (or reject) a click on the board}
begin
  with tEdit(sender) do
  begin
    if text=' ' then
    begin
      if level mod 2 =1
      then showmessage('Machine moves first, click Start button to begin a game')
      else text:='O';
      strkey[tag]:='O';  {update the key}
      if winner('O') then wrapup('O')
      else
      begin
        inc(level);
        makemove;
      end;
    end
    else messagebeep(MB_ICONEXCLAMATION);
  end;
end;


{********** ResetBtnClick ****************}
procedure TForm1.ResetBtnClick(Sender: TObject);
{Reset games won/lost counters}
begin
  totgames:=0;
  XWins:=0;
  OWins:=0;
end;

{************** StartBtnClick ***********}
procedure TForm1.StartBtnClick(Sender: TObject);
{Start a new gane}
var i:integer;
begin
  level:=1;
  gameover:=false;
  autoplay:=false;
  nbrmoves:=0;
  for i:= 0 to 8 do board[i].text:=' ';
  strkey:='---------';
  listbox3.Clear;
  makemove;
end;

{************* Run100BtnClick ************}
procedure TForm1.Run100BtnClick(Sender: TObject);
{run a bunch of random games}
var
  i,n:integer;
begin
  gameover:=true;  {to force 1st startbtnclick}
  startbtnclick(sender);
  autoplay:=true;
  for i:= 1 to gamecount.value do
  begin
    repeat
      repeat
        n:=random(9);
      until strkey[n+1]='-';
      boardkeyclick(board[n]);
      application.processmessages;
    until gameover;
    If i<gamecount.value then startbtnclick(sender);
    autoplay:=true;
  end;
end;

{******************* ResetWeightBtnClick ***********}
procedure TForm1.ResetWeightsBtnClick(Sender: TObject);
begin  initweights; end;

procedure TForm1.DebugBtnClick(Sender: TObject);
begin
  debugpanel.visible:= not debugpanel.visible;
end;

end.