返回
ScanSingleToken——V8底层技术细节的学习之旅
前端
2024-02-20 14:31:12
V8引擎概述
V8是Google开发的开源JavaScript引擎,也是当今最受欢迎的JavaScript引擎之一。它被广泛应用于Chrome浏览器、Node.js等项目。V8引擎以其高效的执行速度和强大的功能而著称,成为许多开发者的首选。
ScanSingleToken方法在词法分析中的作用
词法分析是编译器或解释器对源代码进行分析的第一个阶段。在这个阶段,源代码被分解成一系列的词法符号,如标识符、数字、运算符、括号等。这些词法符号是编程语言的基本构成单元,也是编译器或解释器进行语法分析的基础。
在V8引擎中,ScanSingleToken方法负责从源代码中识别单个的词法符号。它采用了一个简单的有限状态自动机(FSM)模型。FSM是一种能够识别特定语言的正则表达式的机器。在ScanSingleToken方法中,FSM用于识别JavaScript语言中各种词法符号的模式。
ScanSingleToken方法的实现
ScanSingleToken方法的实现位于v8/src/parser/scanner.cc文件中。方法的定义如下:
TokenInfo Scanner::ScanSingleToken(bool can_be_harmony,
bool can_be_break,
int stop_pos,
int stop_control_char) {
DCHECK_NOT_NULL(pre_parser_);
DCHECK_LE(start_pos_, stream_->position());
DCHECK_LE(stream_->position(), stop_pos);
DCHECK_LT(stop_control_char, base::numeric_limits<uint16_t>::max());
Token::Value next = Next();
// Eat white space and newlines.
if (next.is_white_space()) {
bool saw_cr = false;
bool saw_lf = false;
while (next.is_white_space()) {
if (next.is_line_terminator()) {
if (next.IsNewline()) {
// Allow non-terminated newlines anywhere.
saw_lf = true;
} else {
saw_cr = true;
next = Next();
if (!next.is_line_terminator()) continue;
saw_lf = next.IsNewline();
}
}
next = Next();
}
}
Token::Value end_token_type =
(stop_pos == -1) ? next : Token::UNINITIALIZED;
while (next.is_strict_mode_reserved() &&
next != Token::ILLEGAL &&
stop_pos == -1 &&
!strict_mode_) {
// We found a strict mode reserved word in non-strict mode code. Eat
// the token and continue.
next = Next();
}
Token::Value token_type;
if (!Token::IsTokenType(next) && next.is_strict_mode_reserved() &&
strict_mode_) {
// For strict mode code, we have to treat certain keywords as literals.
token_type = next;
next = Next();
if (token_type == Token::SUPER &&
(!IsIdentifierStart(next) || next == Token::PRIVATE_IDENTIFIER)) {
// We look ahead to see if super() is used as a function call.
token_type = Token::SUPER_CALL;
}
} else {
token_type = Token::ConvertCharToToken(next);
}
if (token_type == Token::DIV) {
next = Next();
if (next.is_assign()) {
token_type = Token::DIV_ASSIGN;
next = Next();
}
} else if (token_type == Token::STAR) {
next = Next();
if (next.is_assign()) {
token_type = Token::MUL_ASSIGN;
next = Next();
}
} else if (token_type == Token::SUB) {
next = Next();
if (next.is_assign()) {
token_type = Token::SUB_ASSIGN;
next = Next();
}
} else if (token_type == Token::SHL) {
next = Next();
if (next.is_assign()) {
token_type = Token::SHL_ASSIGN;
next = Next();
}
} else if (token_type == Token::SHR) {
next = Next();
if (next.is_assign()) {
token_type = Token::SHR_ASSIGN;
next = Next();
}
} else if (token_type == Token::SAR) {
next = Next();
if (next.is_assign()) {
token_type = Token::SAR_ASSIGN;
next = Next();
}
} else if (token_type == Token::MOD) {
next = Next();
if (next.is_assign()) {
token_type = Token::MOD_ASSIGN;
next = Next();
}
} else if (token_type == Token::XOR) {
next = Next();
if (next.is_assign()) {
token_type = Token::XOR_ASSIGN;
next = Next();
}
} else if (token_type == Token::BIT_OR) {
next = Next();
if (next.is_assign()) {
token_type = Token::BIT_OR_ASSIGN;
next = Next();
}
} else if (token_type == Token::BIT_AND) {
next = Next();
if (next.is_assign()) {
token_type = Token::BIT_AND_ASSIGN;
next = Next();
}
} else if (token_type == Token::NE) {
next = Next();
if (next.is_eq()) {
token_type = Token::NE_EQ;
next = Next();
}
} else if (token_type == Token::EQ_EQ) {
next = Next();
if (next.is_eq()) {
token_type = Token::EQ_EQ_EQ;
next = Next();
}
} else if (token_type == Token::NOT) {
next = Next();
if (next.is_in()) {
token_type = Token::NOT_IN;
next = Next();
}
} else if (token_type == Token::LT) {
next = Next();
if (next.is_eq()) {
token_type = Token::LT_EQ;
next = Next();
} else if (next.is_lt()) {
token_type = Token::SHL;
next = Next();
}
} else if (token_type == Token::GT) {
next = Next();
if (next.is_eq()) {
token_type = Token::GT_EQ;
next = Next();
} else if (next.is_gt()) {
token_type = Token::SHR;
next = Next();
}
}
if (stop_pos != -1 && next.position() >= stop_pos) {
if (token_type == Token::NUMBER || token_type == Token::STRING ||
token_type == Token::REGEX) {
// If we hit stop_pos in the middle of a literal, we need to backtrack
// and return an incomplete token. This token is then recovered by
// ScanRestOfLiteral.
// We can't do this if the token has been converted to another type,
// e.g. DIV -> DIV_ASSIGN.
BacktrackTo(start_pos_);
token_type = Token::ILLEGAL;
} else {
// For other tokens we can report the token right away and continue
// parsing.
next = Token::Truncate(next, stop_pos);
}
}
if (token_type == Token::END_OF_FILE) {
Stream::Position fp = stream_->position();
if (fp == stop_pos) {
next.column_offset_ = stop_control_char;