#include <iostream>
#include <clang-c/Index.h>
void DisplayTokens(const CXTranslationUnit& tu) {
CXCursor cursor = clang_getTranslationUnitCursor(tu);
CXSourceRange range = clang_getCursorExtent(cursor);
CXToken* tokens = NULL;
unsigned num_tokens = 0;
clang_tokenize(tu, range, &tokens, &num_tokens);
for (unsigned i = 0; i < num_tokens; ++i) {
const CXToken& token = tokens[i];
CXTokenKind kind = clang_getTokenKind(token);
CXString spelling_str = clang_getTokenSpelling(tu, token);
CXSourceRange range = clang_getTokenExtent(tu, token);
std::cerr << clang_getCString(spelling_str) << ", "
<< static_cast<int>(kind) << ", "
<< "[" << range.begin_int_data
<< ":" << range.end_int_data << "]"
<< std::endl;
clang_disposeString(spelling_str);
}
clang_disposeTokens(tu, tokens, num_tokens);
}
void Tokenize(const std::string& filepath) {
const int exclude_decls_from_pch = 1;
const int display_diagnostics = 0;
CXIndex index = clang_createIndex(exclude_decls_from_pch,
display_diagnostics);
const unsigned index_options = CXGlobalOpt_None;
clang_CXIndex_setGlobalOptions(index, index_options);
const char* command_line_args[] = { "-Xclang", "-cc1" };
const int num_command_line_args = sizeof(command_line_args) / sizeof(char*);
const unsigned num_unsaved_files = 0;
CXUnsavedFile* unsaved_files = NULL;
CXTranslationUnit tu = clang_createTranslationUnitFromSourceFile(
index, filepath.c_str(), num_command_line_args, command_line_args,
num_unsaved_files, unsaved_files);
if (tu != NULL) {
DisplayTokens(tu);
clang_disposeTranslationUnit(tu);
} else {
std::cerr << "Failed to tokenize: \"" << filepath << "\"" << std::endl;
}
clang_disposeIndex(index);
}
int main(int argc, char** argv) {
if (argc < 2) {
return 1;
}
std::string filepath(argv[1]);
Tokenize(filepath);
return 0;
}
Check
$ g++ tokenizer.cc -lclang
$ cat hello_world.c
int main(void) {
printf("hello, world\n");
return 0;
}
$ ./a.out hello_world.c
include, 2, [3:10]
<, 0, [11:12]
stdio, 2, [12:17]
., 0, [17:18]
h, 2, [18:19]
>, 0, [19:20]
int, 1, [22:25]
main, 2, [26:30]
(, 0, [30:31]
void, 1, [31:35]
), 0, [35:36]
{, 0, [37:38]
printf, 2, [41:47]
(, 0, [47:48]
"hello, world\n", 3, [48:64]
), 0, [64:65]
;, 0, [65:66]
return, 1, [69:75]
0, 3, [76:77]
;, 0, [77:78]
}, 0, [79:80]