X-Git-Url: http://winboard.nl/cgi-bin?a=blobdiff_plain;f=parser.c;h=d86c8b8c47cc80764191695e35a0ed178bb570f1;hb=a009a27e8c1e0bfa818f12fdcae675d0babc510a;hp=d2bae37adbc5c013fc0ab6ad3efb8a269fa9774a;hpb=ab5cae2d93784e7f5aea0ec871eaf6386b9296eb;p=xboard.git diff --git a/parser.c b/parser.c index d2bae37..d86c8b8 100644 --- a/parser.c +++ b/parser.c @@ -1,13 +1,24 @@ -// New PGN parser by by HGM. I was dissatisfied with the old flex-generated parser for several reasons: -// 1) It required flex to build -// 2) It was not possible to use variant-dependent syntax, which gave trouble for '+' as Sogi promoChar vs check symbol -// 3) It could not handle double-digit rank numbers -// 4) It could not handle PSN moves, with (alpha rank and file digit) -// 5) Having more than 12 ranks would require extension of the rules anyway -// 6) It was cumbersome to maintain, which much code duplication that had to be kept in sync when changing something -// 7) It needed special handling for packaging, because we wanted to include parser.c for people who had no flex -// 8) It was quite large because of the table-driven flex algorithm. -// This new parser suffers from none of that. It might even accomodate traditional Xiangqi notation at some future time. +/* + * parser.c -- + * + * Copyright 2011, 2012, 2013 Free Software Foundation, Inc. + * ------------------------------------------------------------------------ + * + * GNU XBoard is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * GNU XBoard is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. * + * + *------------------------------------------------------------------------ + ** See the file ChangeLog for a revision history. */ #include "config.h" #include @@ -40,7 +51,8 @@ static char fromString = 0, lastChar = '\n'; #define ALPHABETIC 2 #define BADNUMBER (-2000000000) -int ReadLine() +int +ReadLine () { // Read one line from the input file, and append to the buffer char c, *start = inPtr; if(fromString) return 0; // parsing string, so the end is a hard end @@ -55,7 +67,8 @@ int ReadLine() return 1; } -int Scan(char c, char **p) +int +Scan (char c, char **p) { // line-spanning skip to mentioned character or EOF do { while(**p) if(*(*p)++ == c) return 0; @@ -64,7 +77,8 @@ int Scan(char c, char **p) return 1; } -int SkipWhite(char **p) +int +SkipWhite (char **p) { // skip spaces tabs and newlines; return 1 if anything was skipped char *start = *p; do{ @@ -73,7 +87,8 @@ int SkipWhite(char **p) return *p != start; } -int Match(char *pattern, char **ptr) +inline int +Match (char *pattern, char **ptr) { char *p = pattern, *s = *ptr; while(*p && (*p == *s++ || s[-1] == '\r' && *p--)) p++; @@ -84,7 +99,8 @@ int Match(char *pattern, char **ptr) return 0; // no match, no ptr update } -int Word(char *pattern, char **p) +inline int +Word (char *pattern, char **p) { if(Match(pattern, p)) return 1; if(*pattern >= 'a' && *pattern <= 'z' && *pattern - **p == 'a' - 'A') { // capitalized @@ -95,15 +111,16 @@ int Word(char *pattern, char **p) return 0; } -int Verb(char *pattern, char **p) +int +Verb (char *pattern, char **p) { int res = Word(pattern, p); if(res && !Match("s", p)) Match("ed", p); // eat conjugation suffix, if any return res; } - -int Number(char **p) +int +Number (char **p) { int val = 0; if(**p < '0' || **p > '9') return BADNUMBER; @@ -113,7 +130,8 @@ int Number(char **p) return val; } -int RdTime(char c, char **p) +int +RdTime (char c, char **p) { char *start = ++(*p), *sec; // increment *p, as it was pointing to the opening ( or { if(Number(p) == BADNUMBER) return 0; @@ -128,23 +146,25 @@ int RdTime(char c, char **p) return 0; } -char PromoSuffix(char **p) +char +PromoSuffix (char **p) { char *start = *p; if(**p == 'e' && (Match("ep", p) || Match("e.p.", p))) { *p = start; return NULLCHAR; } // non-compliant e.p. suffix is no promoChar! if(**p == '+' && gameInfo.variant == VariantShogi) { (*p)++; return '+'; } - if(**p == '=') (*p)++; //optional = - if(**p == '(' && (*p)[2] == ')' && isalpha( (*p)[1] )) { (*p) += 3; return (*p)[-2]; } - if(isalpha(**p)) return *(*p)++; + if(**p == '=' || (gameInfo.variant == VariantSChess) && **p == '/') (*p)++; // optional = (or / for Seirawan gating) + if(**p == '(' && (*p)[2] == ')' && isalpha( (*p)[1] )) { (*p) += 3; return ToLower((*p)[-2]); } + if(isalpha(**p)) return ToLower(*(*p)++); if(*p != start) return '='; // must be the optional = return NULLCHAR; // no suffix detected } -int NextUnit(char **p) +int +NextUnit (char **p) { // Main parser routine int coord[4], n, result, piece, i; char type[4], promoted, separator, slash, *oldp, *commentEnd, c; - int wom = WhiteOnMove(yyboardindex); + int wom = quickFlag ? quickFlag&1 : WhiteOnMove(yyboardindex); // ********* try white first, because it is so common ************************** if(**p == ' ' || **p == '\n' || **p == '\t') { parseStart = (*p)++; return Nothing; } @@ -157,44 +177,12 @@ int NextUnit(char **p) } parseStart = oldp = *p; // remember where we begin - // Next we do some common symbols where the first character commits us to things that cannot possibly be a move - // (but not {} comments, as those force time-consuming matching of PGN results immediately after it) - - // ********* PGN tags ****************************************** - if(**p == '[') { - oldp = ++(*p); - if(Match("--", p)) { // "[--" could be start of position diagram - if(!Scan(']', p) && (*p)[-3] == '-' && (*p)[-2] == '-') return PositionDiagram; - *p = oldp; - } - SkipWhite(p); - if(isdigit(**p) || isalpha(**p)) { - do (*p)++; while(isdigit(**p) || isalpha(**p) || **p == '+' || - **p == '-' || **p == '=' || **p == '_' || **p == '#'); - SkipWhite(p); - if(*(*p)++ == '"') { - while(**p != '\n' && (*(*p)++ != '"'|| (*p)[-2] == '\\')); // look for unescaped quote - if((*p)[-1] !='"') { *p = oldp; Scan(']', p); return Comment; } // string closing delimiter missing - SkipWhite(p); if(*(*p)++ == ']') return PGNTag; - } - } - Scan(']', p); return Comment; - } - - - // ********* variations (nesting) ****************************** - if(**p =='(') { - if(RdTime(')', p)) return ElapsedTime; - return Open; - } - if(**p ==')') { (*p)++; return Close; } - if(**p == ';') { while(**p != '\n') (*p)++; return Comment; } - // ********* attempt to recognize a SAN move in the leading non-blank text ***** piece = separator = promoted = slash = n = 0; for(i=0; i<4; i++) coord[i] = -1, type[i] = NOTHING; if(**p == '+') (*p)++, promoted++; + if(**p >= 'a' && **p <= 'z' && (*p)[1]== '@') piece =*(*p)++ + 'A' - 'a'; else if(**p >= 'A' && **p <= 'Z') { piece = *(*p)++; // Note we could test for 2-byte non-ascii names here if(**p == '/') slash = *(*p)++; @@ -232,7 +220,6 @@ int NextUnit(char **p) type[1] = NOTHING; // disambiguator goes in first two positions n = 4; } -if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coord[0], coord[1], coord[2], coord[3], type[0], type[1], type[2], type[3]); // we always get here; move must be completely read now, with to-square coord(s) at end if(n == 3) { // incomplete to-square. Could be Xiangqi traditional, or stuff like fxg if(piece && type[1] == NOTHING && type[0] == NUMERIC && type[2] == NUMERIC && @@ -246,7 +233,7 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor && (coord[0] != 14 || coord[2] != 14) /* reserve oo for castling! */ ) { piece = 'P'; n = 4; // kludge alert: fake full to-square } - } + } else if(n == 1 && type[0] == NUMERIC && coord[0] > 1) { while(**p == '.') (*p)++; return Nothing; } // fast exit for move numbers if(n == 4 && type[2] != type[3] && // we have a valid to-square (kludge: type[3] can be NOTHING on fxg type move) (piece || !promoted) && // promoted indicator only valid on named piece type (type[2] == ALPHABETIC || gameInfo.variant == VariantShogi)) { // in Shogi also allow alphabetic rank @@ -338,10 +325,34 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor } } badMove:// we failed to find algebraic move + *p = oldp; + // Next we do some common symbols where the first character commits us to things that cannot possibly be a move + + // ********* PGN tags ****************************************** + if(**p == '[') { + oldp = ++(*p); + if(Match("--", p)) { // "[--" could be start of position diagram + if(!Scan(']', p) && (*p)[-3] == '-' && (*p)[-2] == '-') return PositionDiagram; + *p = oldp; + } + SkipWhite(p); + if(isdigit(**p) || isalpha(**p)) { + do (*p)++; while(isdigit(**p) || isalpha(**p) || **p == '+' || + **p == '-' || **p == '=' || **p == '_' || **p == '#'); + SkipWhite(p); + if(**p == '"') { + (*p)++; + while(**p != '\n' && (*(*p)++ != '"'|| (*p)[-2] == '\\')); // look for unescaped quote + if((*p)[-1] !='"') { *p = oldp; Scan(']', p); return Comment; } // string closing delimiter missing + SkipWhite(p); if(*(*p)++ == ']') return PGNTag; + } + } + Scan(']', p); return Comment; + } + // ********* SAN Castings ************************************* - *p = oldp; if(**p == 'O' || **p == 'o' || **p == '0') { int castlingType = 0; if(Match("O-O-O", p) || Match("o-o-o", p) || Match("0-0-0", p) || @@ -350,6 +361,9 @@ badMove:// we failed to find algebraic move Match("OO", p) || Match("oo", p) || Match("00", p)) castlingType = 1; if(castlingType) { //code from old parser, collapsed for both castling types, and streamlined a bit int rf, ff, rt, ft; ChessSquare king; + char promo=NULLCHAR; + + if(gameInfo.variant == VariantSChess) promo = PromoSuffix(p); if (yyskipmoves) return (int) AmbiguousMove; /* not disambiguated */ @@ -365,7 +379,7 @@ badMove:// we failed to find algebraic move ff = (BOARD_WIDTH-1)>>1; // this would be d-file if (boards[yyboardindex][rf][ff] == king) { /* ICS wild castling */ - ft = castlingType == 1 ? BOARD_LEFT+1 : BOARD_RGHT-3; + ft = castlingType == 1 ? BOARD_LEFT+1 : (gameInfo.variant == VariantJanus ? BOARD_RGHT-2 : BOARD_RGHT-3); } else { ff = BOARD_WIDTH>>1; // e-file ft = castlingType == 1 ? BOARD_RGHT-2 : BOARD_LEFT+2; @@ -381,16 +395,25 @@ badMove:// we failed to find algebraic move if (appData.debugMode) fprintf(debugFP, "Parser FRC (type=%d) %d %d\n", castlingType, ff, ft); if(ff == NoRights || ft == NoRights) return ImpossibleMove; } - sprintf(currentMoveString, "%c%c%c%c",ff+AAA,rf+ONE,ft+AAA,rt+ONE); + sprintf(currentMoveString, "%c%c%c%c%c",ff+AAA,rf+ONE,ft+AAA,rt+ONE,promo); if (appData.debugMode) fprintf(debugFP, "(%d-type) castling %d %d\n", castlingType, ff, ft); return (int) LegalityTest(boards[yyboardindex], PosFlags(yyboardindex)&~F_MANDATORY_CAPTURE, // [HGM] losers: e.p.! - rf, ff, rt, ft, NULLCHAR); + rf, ff, rt, ft, promo); } } + // ********* variations (nesting) ****************************** + if(**p =='(') { + if(RdTime(')', p)) return ElapsedTime; + return Open; + } + if(**p ==')') { (*p)++; return Close; } + if(**p == ';') { while(**p != '\n') (*p)++; return Comment; } + + // ********* Comments and result messages ********************** *p = oldp; commentEnd = NULL; result = 0; if(**p == '{') { @@ -488,6 +511,10 @@ badMove:// we failed to find algebraic move *p = oldp; // we might need to re-match the skipped stuff } + if(Match("@@@@", p) || Match("--", p) || Match("Z0", p) || Match("pass", p) || Match("null", p)) { + strncpy(currentMoveString, "@@@@", 5); + return yyboardindex & F_WHITE_ON_MOVE ? WhiteDrop : BlackDrop; + } // ********* Efficient skipping of (mostly) alphabetic chatter ********** while(isdigit(**p) || isalpha(**p) || **p == '-') (*p)++; @@ -514,12 +541,14 @@ badMove:// we failed to find algebraic move /* Return offset of next pattern in the current file. */ -int yyoffset() +int +yyoffset () { return ftell(inputFile) - (inPtr - parsePtr); // subtract what is read but not yet parsed } -void yynewfile (FILE *f) +void +yynewfile (FILE *f) { // prepare parse buffer for reading file inputFile = f; inPtr = parsePtr = inputBuf; @@ -528,14 +557,16 @@ void yynewfile (FILE *f) *inPtr = NULLCHAR; // make sure we will start by reading a line } -void yynewstr P((char *s)) +void +yynewstr P((char *s)) { parsePtr = s; inputFile = NULL; fromString = 1; } -int yylex() +int +yylex () { // this replaces the flex-generated parser int result = NextUnit(&parsePtr); char *p = parseStart, *q = yytext; @@ -545,7 +576,8 @@ int yylex() return result; } -int Myylex() +int +Myylex () { // [HGM] wrapper for yylex, which treats nesting of parentheses int symbol, nestingLevel = 0, i=0; char *p; @@ -565,7 +597,8 @@ int Myylex() return symbol; } -ChessMove yylexstr(int boardIndex, char *s, char *buf, int buflen) +ChessMove +yylexstr (int boardIndex, char *s, char *buf, int buflen) { ChessMove ret; char *savPP = parsePtr;