Delete metadata of an image file by using ExifTool

Abstract

Delete metadata of an image file by using ExifTool on Ubuntu 12.04.
The following command is useful to sanitize images before uploading and downloaded images.

Install ExifTool

$ wget http://www.sno.phy.queensu.ca/~phil/exiftool/Image-ExifTool-9.60.tar.gz
$ tar xf Image-ExifTool-9.60.tar.gz
$ cd Image-ExifTool-9.60/
$ perl Makefile.PL
$ make test
$ sudo make install
$ cd ..
$ rm -rf Image-ExifTool-9.60
$ rm Image-ExifTool-9.60.tar.gz

Command

# Delete metadata of an image file.
$ exiftool -all= input_image.jpg -out output_image.jpg

Example

Use AcerCE-5330.jpg in Acer.tar.gz downloaded from sample images as an example.

# Before deleting metadata.
$ exiftool AcerCE-5330.jpg 
ExifTool Version Number         : 9.60
File Name                       : AcerCE-5330.jpg
Directory                       : .
File Size                       : 22 kB
File Modification Date/Time     : yyyy:MM:dd HH:mm:ss
File Access Date/Time           : yyyy:MM:dd HH:mm:ss
File Inode Change Date/Time     : yyyy:MM:dd HH:mm:ss
File Permissions                : rw-r--r--
File Type                       : JPEG
MIME Type                       : image/jpeg
Exif Byte Order                 : Little-endian (Intel, II)
Image Description               : ACER
Make                            : ACER
Camera Model Name               : CE-5330
Orientation                     : Horizontal (normal)
X Resolution                    : 72
Y Resolution                    : 72
Resolution Unit                 : inches
Software                        : V1.00
Modify Date                     : 2009:10:10 22:58:22
Y Cb Cr Positioning             : Co-sited
Exposure Time                   : 1/30
F Number                        : 2.8
Exposure Program                : Program AE
ISO                             : 100
...
Circle Of Confusion             : 0.005 mm
Field Of View                   : 53.8 deg
Focal Length                    : 5.4 mm (35 mm equivalent: 35.5 mm)
Hyperfocal Distance             : 2.28 m
Light Value                     : 7.9


# Delete metadata and output as AcerCE-5330.out.jpg.
$ exiftool -all= AcerCE-5330.jpg -o AcerCE-5330.out.jpg
    1 image files created


# After deleting metadata.
$ exiftool AcerCE-5330.out.jpg 
ExifTool Version Number         : 9.60
File Name                       : AcerCE-5330.out.jpg
Directory                       : .
File Size                       : 251 bytes
File Modification Date/Time     : yyyy:MM:dd HH:mm:ss
File Access Date/Time           : yyyy:MM:dd HH:mm:ss
File Inode Change Date/Time     : yyyy:MM:dd HH:mm:ss
File Permissions                : rw-rw-r--
File Type                       : JPEG
MIME Type                       : image/jpeg
Image Width                     : 8
Image Height                    : 8
Encoding Process                : Baseline DCT, Huffman coding
Bits Per Sample                 : 8
Color Components                : 3
Y Cb Cr Sub Sampling            : YCbCr4:2:0 (2 2)
Image Size                      : 8x8

Normalize file names

Abstract

Normalize file names on Ubuntu 12.04.

The following script and command:

  • Normalize unicode characters. E.g. image①.jpg -> image1.jpg.
  • Make the extension of a basename lower case. E.g. image.JPG -> image.jpg
  • Strip the root of a basename. E.g. " image .JPG" -> image.jpg
  • Replace multiple spaces with a single space. E.g. "image file 001.jpg" -> "image file 001.jpg"

Code

normalize_name.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 Shinya
# The MIT License (MIT) http://opensource.org/licenses/mit-license.php

import os
import re
import sys
import unicodedata


def _unicode_to_str(text):
    if isinstance(text, unicode):
        return text.encode('utf-8')
    return text


def _str_to_unicode(text):
    if isinstance(text, str):
        return text.decode('utf-8')
    return text


def _normalize_multiple_spaces(name):
    """Replaces multiple spaces with a single space.

    >>> _normalize_multiple_spaces('\t name    having \t multiple spaces')
    ' name having multiple spaces'
    """
    return re.sub('\s+', ' ', name)


def _remove_special_characters(name):
    """Removes special characters like a slash.

    >>> _remove_special_characters('name having a slash / character')
    'name having a slash  character'
    """
    return name.replace(os.path.sep, '')


def _normalize_basename(basename):
    """Normalizes a basename.

    Replaces multiple spaces with a single space.
    >>> _normalize_basename('name  having\t multiple spaces')
    'name having multiple spaces'

    Strips heading and tailing spaces.
    >>> _normalize_basename('  name starting and ending with spaces   ')
    'name starting and ending with spaces'

    Make the extension of a basename lower cases.
    >>> _normalize_basename('name.JPG')
    'name.jpg'

    Strips the root of a basename.
    >>> _normalize_basename('  name  .jpg')
    'name.jpg'

    Normalizes unicode characters. E.g. ① -> 1
    >>> _normalize_basename('name①.jpg')
    'name1.jpg'
    """
    norm_basename = _str_to_unicode(basename)
    norm_basename = unicodedata.normalize('NFKC', norm_basename)
    norm_basename = _remove_special_characters(norm_basename)
    norm_basename = os.path.normcase(norm_basename)
    norm_basename = _normalize_multiple_spaces(norm_basename)
    (root, ext) = os.path.splitext(norm_basename)
    return _unicode_to_str(root.strip() + ext.strip().lower())


def _main():
    if len(sys.argv) != 2:
        sys.exit('Usage: $ ./normalize_name.py <target_file>')
    src_path = sys.argv[1]
    (src_dir, src_basename) = os.path.split(os.path.normpath(src_path))
    norm_basename = _normalize_basename(src_basename)
    if src_basename != norm_basename:
        sys.stdout.write('"%s" -> "%s"\n' % (src_basename, norm_basename))
        # os.rename overwrites a file if the file exists.
        os.rename(os.path.join(src_dir, src_basename),
                  os.path.join(src_dir, norm_basename))


if __name__ == '__main__':
    # Run the following command to run doctests.
    # $ python -m doctest -v normalize_name.py
    _main()
Set permission to run
$ chmod +x normalize_name.py
Tests
$ python -m doctest -v normalize_name.py

Commands

# Set a target directory.
$ TARGET_DIR='/home/shinya/example'
$ find "${TARGET_DIR}" -type f -exec ./normalize_name.py {} \;

Example

$ TARGET_DIR='/home/shinya/example'


# Before running the command.
$ tree "${TARGET_DIR}"
/home/shinya/example
├── image①.jpg
└── subdir
    └──    image  file  001 .jpg


$ find "${TARGET_DIR}" -type f -exec ./normalize_name.py {} \;
"   image  file  001 .jpg" -> "image file 001.jpg"
"image.jpg" -> "image1.jpg"


# After running the command.
$ tree "${TARGET_DIR}"
/home/shinya/example
├── image1.jpg
└── subdir
    └── image file 001.jpg

Delete suspicious directories and files by using the find command

Abstract

Delete suspicious directories and files by using the find command on Ubuntu 12.04.
The following commands are useful to sanitize files which have been downloaded from a suspicious site.

Commands

# Set a target directory.
$ TARGET_DIR='/home/shinya/suspicious'


# Delete symbolic links
$ find "${TARGET_DIR}" -type l -print -exec rm {} \;


# Delete directories whose name starts with a dot such as .git and .emacs.d.
$ find "${TARGET_DIR}" -type d -name '.*' -print -exec rm -r {} +
# Delete files whose name starts with a dot such as .gitignore and .emacs
$ find "${TARGET_DIR}" -type f -name '.*' -print -exec rm {} \;


# Delete empty directories.
$ find "${TARGET_DIR}" -type d -empty -delete


# Delete suspicious and unnecessary files.
# The following matches are case insensitive (-iname).
# Use -name for case sensitive matches.
$ find "${TARGET_DIR}" -type f -iname '*.exe' -print -exec rm {} \;
$ find "${TARGET_DIR}" -type f -iname 'Thumbs.db' -print -exec rm {} \;
$ find "${TARGET_DIR}" -type f -iname 'a.out' -print -exec rm {} \;

Call Python functions from C++ on Ubuntu 12.04

Code

/home/shinya/py_example/mymain.py

import sys

def main(args):
    sys.stdout.write('Hello, world!\n');
    if args:
        sys.stdout.write('{0}\n'.format(args))

/home/shinya/py_example/main.cc

#include <Python.h>

#include <cassert>
#include <iostream>

int main(int argc, char** argv) {
  Py_Initialize();
  assert(Py_IsInitialized());

  const char* kModuleName = "mymain";
  PyObject* module_name = PyString_FromString(kModuleName);
  PyObject* module = PyImport_Import(module_name);

  PyObject* dic = PyModule_GetDict(module);
  const char* kFuncName = "main";
  PyObject* main_func = PyDict_GetItemString(dic, kFuncName);
  assert(PyCallable_Check(main_func));

  PyObject* main_args = PyTuple_New(1);
  PyObject* main_args_0 = PyString_FromString("Hello, Python!");
  PyTuple_SetItem(main_args, 0, main_args_0);
  PyObject_CallObject(main_func, main_args);

  // raise Exception
  PyObject_CallObject(main_func, NULL);
  PyErr_Print();

  Py_Finalize();
  return 0;
}

Check

$ export PYTHONPATH=${PYTHONPATH}:/home/shinya/py_example
$ g++ -Wall -I/usr/include/python2.7 main.cc -lpython2.7
$ ./a.out
Hello, world!
Hello, Python!
TypeError: main() takes exactly 1 argument (0 given)

Call Java functions from C++ by using Java Native Interface (JNI) on Ubuntu 12.04

Code

/home/shinya/jni_example/Main.java

public class Main {
  public static void main(String[] args) {
    System.out.println("Hello, world!");
    if (1 <= args.length) {
      System.out.println(args[0]);
    }
  }
}

/home/shinya/jni_example/main.cc

#include <jni.h>

#include <cassert>
#include <iostream>

int main(int argc, char** argv) {
  const int kNumOptions = 3;
  JavaVMOption options[kNumOptions] = {
    { const_cast<char*>("-Xmx128m"), NULL },
    { const_cast<char*>("-verbose:gc"), NULL },
    { const_cast<char*>("-Djava.class.path=/home/shinya/jni_example"), NULL }
  };

  JavaVMInitArgs vm_args;
  vm_args.version = JNI_VERSION_1_6;
  vm_args.options = options;
  vm_args.nOptions = sizeof(options) / sizeof(JavaVMOption);
  assert(vm_args.nOptions == kNumOptions);

  JNIEnv* env = NULL;
  JavaVM* jvm = NULL;
  int res = JNI_CreateJavaVM(&jvm, reinterpret_cast<void**>(&env), &vm_args);
  if (res != JNI_OK) {
    std::cerr << "FAILED: JNI_CreateJavaVM " << res << std::endl;
    return -1;
  }

  const char* kClassName = "Main";
  jclass cls = env->FindClass(kClassName);
  if (cls == NULL) {
    std::cerr << "FAILED: FindClass" << std::endl;
    return -1;
  }

  const char* kMethodName = "main";
  jmethodID mid =
      env->GetStaticMethodID(cls, kMethodName, "([Ljava/lang/String;)V");
  if (mid == NULL) {
    std::cerr << "FAILED: GetStaticMethodID" << std::endl;
    return -1;
  }

  const jsize kNumArgs = 1;
  jclass string_cls = env->FindClass("java/lang/String");
  jobject initial_element = NULL;
  jobjectArray method_args = env->NewObjectArray(kNumArgs, string_cls, initial_element);

  jstring method_args_0 = env->NewStringUTF("Hello, Java!");
  env->SetObjectArrayElement(method_args, 0, method_args_0);

  env->CallStaticVoidMethod(cls, mid, method_args);
  jvm->DestroyJavaVM();

  return 0;
}

Check

$ cd /home/shinya/jni_example
$ javac Main.java
$ LIBPATH=/usr/lib/jvm/java-6-openjdk-amd64/jre/lib/amd64/server
$ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${LIBPATH}
$ g++ -Wall main.cc -L${LIBPATH} -ljvm
$ ./a.out
Hello, world!
Hello, Java!

Tokenize c, c++ and objective-c files with clang

// tokenizer.cc
// Copyright (c) 2013 Shinya
// The MIT License (MIT) http://opensource.org/licenses/mit-license.php

#include <iostream>

#include <clang-c/Index.h>

void DisplayTokens(const CXTranslationUnit& tu) {
  CXCursor cursor = clang_getTranslationUnitCursor(tu);
  CXSourceRange range = clang_getCursorExtent(cursor);
  CXToken* tokens = NULL;
  unsigned num_tokens = 0;
  clang_tokenize(tu, range, &tokens, &num_tokens);
  for (unsigned i = 0; i < num_tokens; ++i) {
    const CXToken& token = tokens[i];
    CXTokenKind kind = clang_getTokenKind(token);
    CXString spelling_str = clang_getTokenSpelling(tu, token);
    CXSourceRange range = clang_getTokenExtent(tu, token);
    std::cerr << clang_getCString(spelling_str) << ", "
              << static_cast<int>(kind) << ", "
              << "[" << range.begin_int_data
              << ":" << range.end_int_data << "]"
              << std::endl;
    clang_disposeString(spelling_str);
  }
  clang_disposeTokens(tu, tokens, num_tokens);
}

void Tokenize(const std::string& filepath) {
  const int exclude_decls_from_pch = 1;
  const int display_diagnostics = 0;
  CXIndex index = clang_createIndex(exclude_decls_from_pch,
                                    display_diagnostics);
  const unsigned index_options = CXGlobalOpt_None;
  clang_CXIndex_setGlobalOptions(index, index_options);
  const char* command_line_args[] = { "-Xclang", "-cc1" };
  const int num_command_line_args = sizeof(command_line_args) / sizeof(char*);
  const unsigned num_unsaved_files = 0;
  CXUnsavedFile* unsaved_files = NULL;
  CXTranslationUnit tu = clang_createTranslationUnitFromSourceFile(
      index, filepath.c_str(), num_command_line_args, command_line_args,
      num_unsaved_files, unsaved_files);
  if (tu != NULL) {
    DisplayTokens(tu);
    clang_disposeTranslationUnit(tu);
  } else {
    std::cerr << "Failed to tokenize: \"" << filepath << "\"" << std::endl;
  }
  clang_disposeIndex(index);
}

int main(int argc, char** argv) {
  if (argc < 2) {
    return 1;
  }
  std::string filepath(argv[1]);
  Tokenize(filepath);
  return 0;
}

Check

$ g++ tokenizer.cc -lclang
$ cat hello_world.c 
#include <stdio.h>

int main(void) {
  printf("hello, world\n");
  return 0;
}
$ ./a.out hello_world.c 
#, 0, [2:3]
include, 2, [3:10]
<, 0, [11:12]
stdio, 2, [12:17]
., 0, [17:18]
h, 2, [18:19]
>, 0, [19:20]
int, 1, [22:25]
main, 2, [26:30]
(, 0, [30:31]
void, 1, [31:35]
), 0, [35:36]
{, 0, [37:38]
printf, 2, [41:47]
(, 0, [47:48]
"hello, world\n", 3, [48:64]
), 0, [64:65]
;, 0, [65:66]
return, 1, [69:75]
0, 3, [76:77]
;, 0, [77:78]
}, 0, [79:80]