[Libreoffice-commits] online.git: 3 commits - tools/map.cpp

Michael Meeks michael.meeks at collabora.com
Mon Dec 11 21:56:53 UTC 2017


 tools/map.cpp |  221 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 181 insertions(+), 40 deletions(-)

New commits:
commit 44600de7c7dbe11da0545cf73ccae9dfa653d141
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Sat Dec 9 17:33:15 2017 +0000

    loolmap: implement a simple string scan and dump function.
    
    Change-Id: I056f764c99f977a0178105511dfd5406ea6dbfa5

diff --git a/tools/map.cpp b/tools/map.cpp
index 88c1aa07..c69db7d2 100644
--- a/tools/map.cpp
+++ b/tools/map.cpp
@@ -31,6 +31,7 @@ typedef unsigned long long addr_t;
 
 bool DumpHex = false;
 bool DumpAll = false;
+bool DumpStrings = false;
 
 #define MAP_SIZE 20
 #define PATH_SIZE 1000 // No harm in having it much larger than strictly necessary. Avoids compiler warning.
@@ -138,20 +139,44 @@ static void dumpDiff(const std::string &pageStr, const std::string &parentStr)
     }
 }
 
+struct AddrSpace;
+
 struct Map {
     addr_t _start;
     addr_t _end;
     std::string _name;
+    size_t size() { return _end - _start; }
+};
+
+struct StringData {
+    size_t _count;
+    size_t _chars;
+    StringData() :
+        _count(0),
+        _chars(0)
+    {}
 };
 
 struct AddrSpace {
     unsigned _proc_id;
     std::vector<Map> _maps;
+    StringData _strings[3];
 
     AddrSpace(unsigned proc_id) :
         _proc_id(proc_id)
     {
     }
+    void printStats()
+    {
+        char prefixes[] = { 'S', 'U', 'c' };
+        for (int i = 0; i < 2; ++i)
+        {
+            printf("%cStrings      :%20lld, %lld chars\n",
+                   prefixes[i], (addr_t)_strings[i]._count,
+                   (addr_t)_strings[i]._chars);
+       }
+    }
+
     std::string findName(addr_t page) const
     {
         for (const Map &i : _maps)
@@ -162,6 +187,81 @@ struct AddrSpace {
         return std::string("");
     }
 
+    void insert(addr_t start, addr_t end, const char *name)
+    {
+        Map map;
+        map._start = start;
+        map._end = end;
+        map._name = std::string(name);
+        _maps.push_back(map);
+    }
+
+    // Normal OUString:
+    // 20 00 00 00 00 00 00 00  02 00 00 00 05 00 00 00  4b 00 45 00 59 00 5f 00  55 00 00 00 00 00 00 00  |  ...............K.E.Y._.U......
+    // 20 00 00 00 00 00 00 00  02 00 00 00 05 00 00 00  4b 45 59 5f 55 00 00 00  00 00 00 00 00 00 00 00  |  ...............KEY_U..........
+
+    bool isStringAtOffset(const std::vector<unsigned char> &data, size_t i,
+                          uint32_t len, bool isUnicode, std::string &str)
+    {
+        str = isUnicode ? "U_" : "S_";
+        int step = isUnicode ? 2 : 1;
+        for (size_t j = i; j < i + len*step && j < data.size(); j += step)
+        {
+            if (isascii(data[j]) && !iscntrl(data[j]))
+                str += static_cast<char>(data[j]);
+            else
+                return false;
+        }
+        return true;
+    }
+
+    void scanForSalStrings(Map &map, const std::vector<unsigned char> &data)
+    {
+        for (size_t i = 0; i < data.size() - 24; i += 4)
+        {
+            const uint32_t *p = reinterpret_cast<const uint32_t *>(&data[i]);
+            uint32_t len;
+            if ((p[0] & 0xffffff) < 0x1000 && // plausible max ref-count
+                (len = p[1]) < 0x100 &&     // plausible max string length
+                len <= (data.size() - i) &&
+                len > 0)
+            {
+                std::string str;
+                bool isUnicode = data[i+1] == 0 && data[i+3] == 0;
+                if (isStringAtOffset(data, i + 8, len, isUnicode, str))
+                {
+                    StringData &sdata = _strings[isUnicode ? 1 : 0];
+                    sdata._count ++;
+                    sdata._chars += len;
+                    if (DumpStrings)
+                        printf("string address 0x%.8llx %s\n",
+                               map._start + i, str.c_str());
+                }
+                i += 8;
+            }
+        }
+    }
+
+    void scanMapsForStrings()
+    {
+        int mem_fd = openPid(_proc_id, "mem");
+        if (DumpStrings)
+            printf("String dump:\n");
+        for (auto &map : _maps)
+        {
+            std::vector<unsigned char> data;
+            data.resize (map.size());
+            if (lseek(mem_fd, map._start, SEEK_SET) < 0 ||
+                read(mem_fd, &data[0], map.size()) != (int)map.size())
+                error(EXIT_FAILURE, errno, "Failed to seek in /proc/%d/mem to %lld",
+                      _proc_id, map._start);
+
+            scanForSalStrings(map, data);
+        }
+        if (DumpStrings)
+            printf("String dump ends.\n");
+        close (mem_fd);
+    }
 };
 
 static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, const std::vector<addr_t> &pages, const AddrSpace &space)
@@ -191,12 +291,6 @@ static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, co
         else if (read(parent_fd, &parentData[0], 0x1000) != 0x1000)
             parentData.resize(0); // missing equivalent page.
 
-        // Diff as ASCII
-        std::stringstream pageStr;
-        Util::dumpHex(pageStr, "", "", pageData, false);
-        std::stringstream parentStr;
-        Util::dumpHex(parentStr, "", "", parentData, false);
-
         int touched = 0;
         const char *style;
         if (parentData.size() > 0)
@@ -231,6 +325,12 @@ static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, co
 
         if (DumpHex)
         {
+            // Diff as ASCII
+            std::stringstream pageStr;
+            Util::dumpHex(pageStr, "", "", pageData, false);
+            std::stringstream parentStr;
+            Util::dumpHex(parentStr, "", "", parentData, false);
+
             printf ("%s page: 0x%.8llx (%d/%d) - touched: %d - %s - from %s\n",
                     type, page, (int)++cnt, (int)pages.size(), touched,
                     style, space.findName(page).c_str());
@@ -364,17 +464,12 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
             // 012d0000-0372f000 rw-p 00000000 00:00 0  [heap]
             if (sscanf(buffer, "%llx-%llx rw-p", &start, &end) == 2)
             {
-                Map map;
-                map._start = start;
-                map._end = end;
                 const char *name = strchr(buffer, '[');
                 if (!name)
                     name = strchr(buffer, '/');
-                if (name)
-                    map._name = std::string(name);
-                else
-                    map._name = std::string("[anon]");
-                space._maps.push_back(map);
+                if (!name)
+                    name = "[anon]";
+                space.insert(start, end, name);
                 for (addr_t p = start; p < end; p += 0x1000)
                     pushTo->push_back(p);
             }
@@ -409,6 +504,7 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
             }
         }
     }
+    space.scanMapsForStrings();
 
     printf("%s\n", cmdline);
     printf("Process ID    :%20d\n", proc_id);
@@ -425,6 +521,8 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
     printf("Heap page cnt :%20lld\n", (addr_t)heapVAddrs.size());
     printf("Anon page cnt :%20lld\n", (addr_t)anonVAddrs.size());
     printf("File page cnt :%20lld\n", (addr_t)fileVAddrs.size());
+    printf("--------------------------------------\n");
+    space.printStats();
     printf("\n");
 
     dump_unshared(proc_id, parent_id, "heap", heapVAddrs, space);
@@ -463,6 +561,7 @@ int main(int argc, char **argv)
     char path_proc[PATH_SIZE];
     char cmdline[BUFFER_SIZE];
 
+    bool found = false;
     bool help = false;
     unsigned forPid = 0;
     const char *appOrPid = nullptr;
@@ -479,6 +578,8 @@ int main(int argc, char **argv)
             DumpHex = true;
         else if (strstr(arg, "--all"))
             DumpAll = true;
+        else if (strstr(arg, "--strings"))
+            DumpStrings = true;
         else
             appOrPid = arg;
     }
@@ -489,8 +590,9 @@ int main(int argc, char **argv)
     {
         fprintf(stderr, "Usage: loolmap --hex <name of process|pid>\n");
         fprintf(stderr, "Dump memory map information for a given process\n");
-        fprintf(stderr, "    --hex    Hex dump relevant page contents and diff to parent process\n");
-        fprintf(stderr, "    --all    Hex dump all writable pages whether touched or not\n");
+        fprintf(stderr, "    --hex     Hex dump relevant page contents and diff to parent process\n");
+        fprintf(stderr, "    --strings Print all detected strings\n");
+        fprintf(stderr, "    --all     Hex dump all writable pages whether touched or not\n");
         return 0;
     }
 
@@ -516,10 +618,14 @@ int main(int argc, char **argv)
                 unsigned parent_id = getParent(pid_proc);
                 snprintf(path_proc, sizeof(path_proc), "/proc/%s/%s", dir_proc->d_name, "smaps");
                 total_smaps(pid_proc, parent_id, path_proc, cmdline);
+                found = true;
             }
         }
     }
 
+    if (!found)
+        fprintf(stderr, "Failed to find process %s\n", appOrPid);
+
     return EXIT_SUCCESS;
 }
 
commit 7dcd2ddd900b08629b59cf6e518b98af2ab4b402
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Sat Dec 9 17:18:59 2017 +0000

    loolmap: share some /proc opening pieces.
    
    Change-Id: I2d8b6eeec4afad52f74153c3d7ac904dfcb5db8f

diff --git a/tools/map.cpp b/tools/map.cpp
index 7d1be8b6..88c1aa07 100644
--- a/tools/map.cpp
+++ b/tools/map.cpp
@@ -87,6 +87,15 @@ static int read_buffer(char *buffer, unsigned size,
     return total_bytes;
 }
 
+static int openPid(unsigned proc_id, const char *name)
+{
+    char path_proc[PATH_SIZE];
+    snprintf(path_proc, sizeof(path_proc), "/proc/%d/%s", proc_id, name);
+    int fd = open(path_proc, 0);
+    if (fd < 0)
+        error(EXIT_FAILURE, errno, "Failed to open %s", path_proc);
+    return fd;
+}
 
 static std::vector<std::string> lineBreak(std::string str)
 {
@@ -136,8 +145,13 @@ struct Map {
 };
 
 struct AddrSpace {
+    unsigned _proc_id;
     std::vector<Map> _maps;
 
+    AddrSpace(unsigned proc_id) :
+        _proc_id(proc_id)
+    {
+    }
     std::string findName(addr_t page) const
     {
         for (const Map &i : _maps)
@@ -147,20 +161,13 @@ struct AddrSpace {
         }
         return std::string("");
     }
+
 };
 
 static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, const std::vector<addr_t> &pages, const AddrSpace &space)
 {
-    char path_proc[PATH_SIZE];
-    snprintf(path_proc, sizeof(path_proc), "/proc/%d/mem", proc_id);
-    int mem_fd = open(path_proc, 0);
-    if (mem_fd < 0)
-        error(EXIT_FAILURE, errno, "Failed to open %s", path_proc);
-
-    snprintf(path_proc, sizeof(path_proc), "/proc/%d/mem", parent_id);
-    int parent_fd = open(path_proc, 0);
-    if (parent_fd < 0)
-        error(EXIT_FAILURE, errno, "Failed to open %s", path_proc);
+    int mem_fd = openPid(proc_id, "mem");
+    int parent_fd = openPid(parent_id, "mem");
 
     if (DumpHex)
         printf ("\nUn-shared data dump\n");
@@ -277,11 +284,7 @@ static void dump_unshared(unsigned proc_id, unsigned parent_id,
                           const char *type, const std::vector<addr_t> &vaddrs,
                           const AddrSpace & space)
 {
-    char path_proc[PATH_SIZE];
-    snprintf(path_proc, sizeof(path_proc), "/proc/%d/pagemap", proc_id);
-    int fd = open(path_proc, 0);
-    if (fd < 0)
-        error(EXIT_FAILURE, errno, "Failed to open %s", path_proc);
+    int fd = openPid(proc_id, "pagemap");
 
     std::vector<char> bitmap;
     std::vector<addr_t> vunshared;
@@ -337,7 +340,7 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
     addr_t smap_value;
     char smap_key[MAP_SIZE];
 
-    AddrSpace space;
+    AddrSpace space(proc_id);
 
     std::vector<addr_t> heapVAddrs, anonVAddrs, fileVAddrs;
     std::vector<addr_t> *pushTo = nullptr;
@@ -369,6 +372,8 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
                     name = strchr(buffer, '/');
                 if (name)
                     map._name = std::string(name);
+                else
+                    map._name = std::string("[anon]");
                 space._maps.push_back(map);
                 for (addr_t p = start; p < end; p += 0x1000)
                     pushTo->push_back(p);
@@ -429,15 +434,12 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
 
 static unsigned getParent(int proc_id)
 {
-    char path_proc[PATH_SIZE];
-    snprintf(path_proc, sizeof(path_proc), "/proc/%d/stat", proc_id);
-    int fd = open(path_proc, 0);
-    if (fd < 0)
-        error(EXIT_FAILURE, errno, "Failed to open %s", path_proc);
+    int fd = openPid(proc_id, "stat");
+
     char buffer[4096];
     int len;
     if ((len = read(fd, buffer, sizeof (buffer))) < 0)
-        error(EXIT_FAILURE, errno, "Failed to read %s", path_proc);
+        error(EXIT_FAILURE, errno, "Failed to read /proc/%d/stat", proc_id);
     close (fd);
     buffer[len] = '\0';
 
@@ -445,8 +447,8 @@ static unsigned getParent(int proc_id)
     unsigned unused, ppid = 0;
     if (sscanf(buffer, "%d %s %c %d", &unused, cmd, &state, &ppid) != 4 || ppid == 0)
     {
-        fprintf(stderr, "Failed to locate parent from file '%s' : '%s'\n",
-                path_proc, buffer);
+        fprintf(stderr, "Failed to locate parent from "
+                "/proc/%d/stat : '%s'\n", proc_id, buffer);
         exit (1);
     }
 
commit 50cdc168842e6e197bd927558cf9d3f9785c8149
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Fri Dec 8 18:13:09 2017 +0000

    loolmap: store address space mappings.
    
    Change-Id: I30b61ed8407996923f07368263bc8b773dfdc061

diff --git a/tools/map.cpp b/tools/map.cpp
index 41025c9e..7d1be8b6 100644
--- a/tools/map.cpp
+++ b/tools/map.cpp
@@ -129,7 +129,27 @@ static void dumpDiff(const std::string &pageStr, const std::string &parentStr)
     }
 }
 
-static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, const std::vector<addr_t> &pages)
+struct Map {
+    addr_t _start;
+    addr_t _end;
+    std::string _name;
+};
+
+struct AddrSpace {
+    std::vector<Map> _maps;
+
+    std::string findName(addr_t page) const
+    {
+        for (const Map &i : _maps)
+        {
+            if (i._start <= page && i._end > page)
+                return i._name;
+        }
+        return std::string("");
+    }
+};
+
+static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, const std::vector<addr_t> &pages, const AddrSpace &space)
 {
     char path_proc[PATH_SIZE];
     snprintf(path_proc, sizeof(path_proc), "/proc/%d/mem", proc_id);
@@ -204,11 +224,11 @@ static void dumpPages(unsigned proc_id, unsigned parent_id, const char *type, co
 
         if (DumpHex)
         {
-            printf ("%s page: 0x%.8llx (%d/%d) - touched: %d - %s\n",
+            printf ("%s page: 0x%.8llx (%d/%d) - touched: %d - %s - from %s\n",
                     type, page, (int)++cnt, (int)pages.size(), touched,
-                    style);
+                    style, space.findName(page).c_str());
 
-            if (parentData.size() == 0)
+            if (touched == 0)
                 printf("%s", pageStr.str().c_str());
             else
                 dumpDiff(pageStr.str(), parentStr.str());
@@ -254,7 +274,8 @@ static std::vector<char> compressBitmap(const std::vector<char> &bitmap)
 }
 
 static void dump_unshared(unsigned proc_id, unsigned parent_id,
-                          const char *type, const std::vector<addr_t> &vaddrs)
+                          const char *type, const std::vector<addr_t> &vaddrs,
+                          const AddrSpace & space)
 {
     char path_proc[PATH_SIZE];
     snprintf(path_proc, sizeof(path_proc), "/proc/%d/pagemap", proc_id);
@@ -300,7 +321,7 @@ static void dump_unshared(unsigned proc_id, unsigned parent_id,
     std::vector<char> compressed = compressBitmap(bitmap);
     printf ("\tRLE sharing bitmap:\n%s\n", &compressed[0]);
 
-    dumpPages(proc_id, parent_id, type, vunshared);
+    dumpPages(proc_id, parent_id, type, vunshared, space);
 }
 
 static void total_smaps(unsigned proc_id, unsigned parent_id,
@@ -316,6 +337,8 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
     addr_t smap_value;
     char smap_key[MAP_SIZE];
 
+    AddrSpace space;
+
     std::vector<addr_t> heapVAddrs, anonVAddrs, fileVAddrs;
     std::vector<addr_t> *pushTo = nullptr;
 
@@ -338,6 +361,15 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
             // 012d0000-0372f000 rw-p 00000000 00:00 0  [heap]
             if (sscanf(buffer, "%llx-%llx rw-p", &start, &end) == 2)
             {
+                Map map;
+                map._start = start;
+                map._end = end;
+                const char *name = strchr(buffer, '[');
+                if (!name)
+                    name = strchr(buffer, '/');
+                if (name)
+                    map._name = std::string(name);
+                space._maps.push_back(map);
                 for (addr_t p = start; p < end; p += 0x1000)
                     pushTo->push_back(p);
             }
@@ -389,9 +421,10 @@ static void total_smaps(unsigned proc_id, unsigned parent_id,
     printf("Anon page cnt :%20lld\n", (addr_t)anonVAddrs.size());
     printf("File page cnt :%20lld\n", (addr_t)fileVAddrs.size());
     printf("\n");
-    dump_unshared(proc_id, parent_id, "heap", heapVAddrs);
-    dump_unshared(proc_id, parent_id, "anon", anonVAddrs);
-    dump_unshared(proc_id, parent_id, "file", fileVAddrs);
+
+    dump_unshared(proc_id, parent_id, "heap", heapVAddrs, space);
+    dump_unshared(proc_id, parent_id, "anon", anonVAddrs, space);
+    dump_unshared(proc_id, parent_id, "file", fileVAddrs, space);
 }
 
 static unsigned getParent(int proc_id)


More information about the Libreoffice-commits mailing list