Omar Alberto Hidalgo Nieves Compiladores Tarea 3

Tarea# 3
Analizador Léxico
Diseñar un analizador léxico para código fuente en lenguaje C.

1. Copiamos en el editor de texto lo siguiente y guárdamos en el archivo clexer.l.

ES (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+))
WS [ \t\v\n\f] hex x[0-9a-fA-F]+ oct
"/*" { comment(); }
"//".* { /* ignorar comentario */ }
"auto" { return(AUTO); }
"break" { return(BREAK); }
"case" { return(CASE); }
"char" { return(CHAR); }
"const" { return(CONST); }
"continue" { return(CONTINUE); }
"default" { return(DEFAULT); }
"do" { return(DO); }
"double" { return(DOUBLE); }
"else" { return(ELSE); }
"enum" { return(ENUM); }
"extern" { return(EXTERN); }
"float" { return(FLOAT); }
"for" { return(FOR); }
"goto" { return(GOTO); }
"if" { return(IF); }
"inline" { return(INLINE); }
"int" { return(INT); }
"long" { return(LONG); }
"register" { return(REGISTER); }
"restrict" { return(RESTRICT); }
"return" { return(RETURN); }
"short" { return(SHORT); }
"signed" { return(SIGNED); }
"sizeof" { return(SIZEOF); }
"static" { return(STATIC); }
"struct" { return(STRUCT); }
"switch" { return(SWITCH); }
"typedef" { return(TYPEDEF); }
"union" { return(UNION); }
"unsigned" { return(UNSIGNED); } "void"
{ return(VOID); }
"volatile" { return(VOLATILE); }
"while" { return(WHILE); }
"_Alignas" { return ALIGNAS; }
"_Alignof" { return ALIGNOF; }
"_Atomic" { return ATOMIC; }
"_Bool" { return BOOL; }
"_Complex" { return COMPLEX; }
"_Generic" { return GENERIC; }
"_Imaginary" { return IMAGINARY; }
"_Noreturn" { return NORETURN; }
"_Static_assert" { return STATIC_ASSERT; }
"_Thread_local" { return THREAD_LOCAL; } {L}{A}*
{ /* Identificador */
yylval.str = strdup(yytext);
{HP}{H}+{IS}? { /* Constante hexadecimal */
yylval.val.ival = strtol(yytext,NULL,16);
yylval.val.typ = INTVAL;
{NZ}{D}*{IS}? { /* Constante entera decimal */
yylval.val.ival = atol(yytext);
yylval.val.typ = INTVAL;
0{O}+{IS}? { yylval.val.ival = strtol(yytext,NULL,8);
yylval.val.typ = INTVAL; return(I_CONSTANT);
{D}+{IS}? { yylval.val.ival = atol(yytext);
yylval.val.typ = INTVAL;
{D}+{E}{FS}? { yylval.val.dval = atof(yytext);
yylval.val.typ = DOUBLEVAL;

{D}*"."{D}+({E})?{FS}? { yylval.val.dval = atof(yytext);
yylval.val.typ = DOUBLEVAL;

{D}+"."{D}*({E})?{FS}? { yylval.val.dval = atof(yytext);
yylval.val.typ = DOUBLEVAL;

{SP}?\" { /* Inicio de constante cadena */
buffer = malloc(1); buffer_size
= 1; strcpy(buffer,"");
\n { /* Avance de línea no permitido en constante cadena */
yyerror("Unterminated string constant");
<<EOF>> { /* Se terminó archivo antes de terminar constante cadena */
yyerror("EOF in string constant");
} [^\\\n"] {
buffer = realloc(buffer,buffer_size+yyleng+1);
buffer_size += yyleng; strcat(buffer,yytext);
\\\n /* ignore this */
\\{hex} { int temp
=0,loop = 0; temp =
strtol(yytext+1,NULL,16); buffer =
buffer[buffer_size-1] = temp;
buffer[buffer_size] = '\0';
buffer_size += 1;
} \\{oct} {
int temp =0,loop = 0; temp =
strtol(yytext+1,NULL,8); buffer =
buffer[buffer_size-1] = temp;
buffer[buffer_size] = '\0';
buffer_size += 1;
} \\[^\n] {
buffer = realloc(buffer,buffer_size+1);
case '\'' : buffer[buffer_size-1] = '\"'; break;
case '\"' : buffer[buffer_size-1] = '\"'; break;
case '\?' : buffer[buffer_size-1] = '\?'; break;
case '\\' : buffer[buffer_size-1] = '\\'; break;
case 'a' : buffer[buffer_size-1] = '\b'; break; case
'b' : buffer[buffer_size-1] = '\b'; break; case 'f'
: buffer[buffer_size-1] = '\f'; break; case 'n' :
buffer[buffer_size-1] = '\n'; break; case 'r' :
buffer[buffer_size-1] = '\r'; break; case 't' :
buffer[buffer_size-1] = '\t'; break; case 'v' :
buffer[buffer_size-1] = '\v'; break; default :
buffer[buffer_size-1] = yytext[yyleng-1];
buffer[buffer_size] = '\0';
buffer_size += 1;
\" {
/* Fin de constante cadena */
yylval.str = buffer; BEGIN(INITIAL);
{CP}?\' { /* Inicio de constante caracter */
buffer = malloc(1); buffer_size =
1; strcpy(buffer,"");
\n { /* constante caracter incompleta */
yyerror("Unterminated character constant");
<<EOF>> { /* Se terminó archivo antes de terminar constante caracter */
yyerror("EOF in character constant");
[^\\'] { /* caracter normal */
buffer = realloc(buffer,buffer_size+yyleng+1);
buffer_size += yyleng; strcat(buffer,yytext);
if(buffer_size>2) yyerror("Illegal length of
character constant");
\\{hex} { /* secuencia de escape en hexadecimal */
int temp =0,loop = 0; temp =
strtol(yytext+1,NULL,16); buffer =
realloc(buffer,buffer_size+1); buffer[buffer_size-
1] = temp; buffer[buffer_size] = '\0';
buffer_size += 1; if(buffer_size>2)
yyerror("Illegal length of character constant"); }
\\{oct} { /* secuencia de escape en octal */
int temp =0,loop = 0; temp =
strtol(yytext+1,NULL,8); buffer =
buffer[buffer_size-1] = temp;
buffer[buffer_size] = '\0';
buffer_size += 1; if(buffer_size>2)
yyerror("Illegal length of character constant");
\\['"\?\\abfnrtv] { /* secuencias de escape simples */
buffer = realloc(buffer,buffer_size+1);
switch(yytext[yyleng-1]){ case '\'' :
buffer[buffer_size-1] = '\"'; break; case '\"' :
buffer[buffer_size-1] = '\"'; break; case '\?' :
buffer[buffer_size-1] = '\?'; break; case '\\' :
buffer[buffer_size-1] = '\\'; break; case 'a' :
buffer[buffer_size-1] = '\b'; break; case 'b' :
buffer[buffer_size-1] = '\b'; break; case 'f' :
buffer[buffer_size-1] = '\f'; break; case 'n' :
buffer[buffer_size-1] = '\n'; break; case 'r' :
buffer[buffer_size-1] = '\r'; break; case 't' :
buffer[buffer_size-1] = '\t'; break; case 'v' :
buffer[buffer_size-1] = '\v'; break;
} buffer[buffer_size] =
'\0'; buffer_size += 1;
if(buffer_size>2) yyerror("Illegal length of
character constant");
\' {
/* Fin de constante caracter */
yylval.val.ival = buffer[0]; if(buffer_size>2)
yyerror("Illegal length of character constant");
yylval.val.typ = CHARVAL; free(buffer);
"..." { return(ELLIPSIS); }
">>=" { return(RIGHT_ASSIGN); }
"<<=" { return(LEFT_ASSIGN); }
"+=" { return(ADD_ASSIGN); }
"-=" { return(SUB_ASSIGN); }
"*=" { return(MUL_ASSIGN); }
"/=" { return(DIV_ASSIGN); }
"%=" { return(MOD_ASSIGN); }
"&=" { return(AND_ASSIGN); }
"^=" { return(XOR_ASSIGN); }
"|=" { return(OR_ASSIGN); }
">>" { return(RIGHT_OP); }
"<<" { return(LEFT_OP); }
"++" { return(INC_OP); }
"--" { return(DEC_OP); }
"->" { return(PTR_OP); }
"&&" { return(AND_OP); }
"||" { return(OR_OP); }
"<=" { return(LE_OP); }
">=" { return(GE_OP); }
"==" { return(EQ_OP); }
"!=" { return(NE_OP); }
";" { return(';'); }
("{"|"<%") { return('{'); }
("}"|"%>") { return('}'); }
"," { return(','); }
":" { return(':'); }
"=" { return('='); }
"(" { return('('); }
")" { return(')'); }
"&" { return('&'); }
"!" { return('!'); }
"~" { return('~'); }
"-" { return('-'); }
"+" { return('+'); }
"*" { return('*'); }
"/" { return('/'); }
"%" { return('%'); }
"<" { return('<'); }
">" { return('>'); }
"^" { return('^'); }
"|" { return('|'); }
"?" { return('?'); }
goto loop;
} if (c != 0 &&
} void printstr(char *str) {
char c; while((c =
*str++)!= '\0') {
printchr(c); }
"DEFAULT", printf(",\"");
printf(">"); }
printf("%c",d); }
return 0;
2. Una vez que ya guardamos el archivo anterior procedemos a compilarlo con Flex. Procuramos no tener
errores ya que estos (podrían ser caracteres comillas, los correctos deben ser comillas verticales, ", Microsoft
Office usa comillas sesgadas, “).

3.- Editamos lo siguiente en el editor de texto y lo guarmos como

/* A Bison parser, made by GNU Bison 3.0.4. */

/* Bison interface for Yacc-like parsers in C

Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc.

This program is free software: you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
under terms of your choice, so long as that work isn't itself a
parser generator using the skeleton or a modified version thereof
as a parser skeleton. Alternatively, if you modify or redistribute
the parser skeleton itself, you may (at your option) remove this
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */

/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif #if YYDEBUG
extern int yydebug;

/* Token type. */
int yyparse (void);

#endif /* !YY_YY_Y_TAB_H_INCLUDED */

4.- Genere el ejecutable del analizador léxico por medio del siguiente comando.
.- Una vez generado el ejecutable, copiamos el siguiente código en el editor de texto y lo guarmos como
* a simple C program

void printf();

int main()
int i, sum = 6;
double f = 1.5;
char* c = "Esta es una prueba" ;

for( i = 1; i <= LAST ; i++ ) {

sum += i;
} /*-for-*/
printf("sum = %d\n", sum);
return 0;
6.- Hecho lo anterior, Ejecutamos el analizador léxico por medio de:

Identifica los tokens

EXPLICACIÓN: Como se mostró previamente, podemos observar que lo que realiza el

analizador es revisar el texto ingresado por el usuario y clasificar si se trata de Texto (IDENTIFIER)
o de un numero (I_CONSTANT), así mismo una combinación de letras y números arroja que es
una cadena reconocida, otros caracteres como ‘?’ , ‘+’, estos caracteres no son reconocidos por
este analizador.
7. Pruebe el analizador léxico con otros archivos en C. Verifique que no contengan directivas de

