[Libreoffice-commits] .: solenv/bin

Tor Lillqvist tml at kemper.freedesktop.org
Sun Jan 16 17:30:47 PST 2011


 solenv/bin/remangle32to64.pl |  327 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 327 insertions(+)

New commits:
commit d4d0597b28f3711defa596baaaeaf0ea9a062ac4
Author: Tor Lillqvist <tlillqvist at novell.com>
Date:   Mon Jan 17 03:27:47 2011 +0200

    Add remangle32to64 script
    
    A Perl script to change MSVC mangled symbols from their form in 32-bit
    code to the corresponding mangled symbols in 64-bit code.
    
    The script is obviously not known to work 100%, but seems to work well
    enough for the few cases in LibreOffice (.map files) it has been used
    on so far.

diff --git a/solenv/bin/remangle32to64.pl b/solenv/bin/remangle32to64.pl
new file mode 100644
index 0000000..0ed98fb
--- /dev/null
+++ b/solenv/bin/remangle32to64.pl
@@ -0,0 +1,327 @@
+#!/usr/bin/perl -w /* -*- indent-tabs-mode: nil -*- */
+
+# Version: MPL 1.1 / GPLv3+ / LGPLv3+
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Initial Developer of the Original Code is
+#       Novell, Inc
+# Portions created by the Initial Developer are Copyright (C) 2011 Novell,
+# Inc. All Rights Reserved.
+#
+# Contributor(s): Tor Lillqvist <tml at iki.fi>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
+# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
+# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
+# instead of those above.
+
+# Change MSVC mangled C++ names from 32-bit form to the corresponding
+# 64-bit form.  Each line of input can contain at most one mangled
+# name.
+
+# Based on experimentation with MSVC2008 and the following web pages:
+
+# http://www.geoffchappell.com/viewer.htm?doc=studies/msvc/language/decoration/index.htm
+# Thorough but incomplete. Still, describes details the below sources
+# don't mention.
+
+# http://cvs.winehq.com/cvsweb/wine/dlls/msvcrt/undname.c
+# Wine's __unDname function, presumably the most complete, although
+# not really written to act as "documentation"
+
+# http://mearie.org/documents/mscmangle/
+# Relatively complete but a bit badly structured and terse.
+
+# http://en.wikipedia.org/wiki/Microsoft_Visual_C%2B%2B_Name_Mangling
+# seems to be mostly a ripoff on the mearie.org page
+
+# Example transformation:
+# ??0ORealDynamicLoader at salhelper@@IAE at PAPAV01@ABVOUString at rtl@@1PAX2 at Z =>
+# ??0ORealDynamicLoader at salhelper@@IEAA at PEAPEAV01@AEBVOUString at rtl@@1PEAX2 at Z
+
+# It should be relatively easy to remove the modification parts of the
+# below code and use the regex for some other task on MSVC mangled
+# names.
+
+# The regular expression below accepts also nonsensical mangled names,
+# so it should not be used to verify correctness of mangled names.
+
+use strict;
+
+my @opstack = ();
+
+sub parse_number($)
+{
+  my ($num) = @_;
+
+  return $num + 1 if ($num eq '0' || ($num ge '1' && $num le '9'));
+
+  $num =~ tr/ABCDEFGHIJKLMNOP@/0123456789ABCDEF /;
+  hex($num);
+}
+
+sub format_number($)
+{
+  my ($num) = @_;
+
+  return $num - 1 if ($num <= 10);
+
+  $num = sprintf("%X", $num);
+  $num =~ tr/0123456789ABCDEF/ABCDEFGHIJKLMNOP/;
+  $num.'@';
+}
+
+sub double_thunk($$)
+{
+  my ($number, $position) = @_;
+
+  my $bytes = parse_number($number);
+  $bytes *= 2;
+  push(@opstack, 'r '.($position - length($number)).' '.length($number).' '.format_number($bytes));
+}
+
+while (<>)
+  {
+    m/
+      # Named subpattern definitions. I use names of the form
+      # __CamelCase__ for the named subpatters so that they are easier
+      # to see.
+      (?(DEFINE)
+        (?<__Number__>
+          \?? ([0-9] | [A-P]+@)
+        )
+        (?<__32BitChecksum__>
+          [A-P]{8}@
+        )
+        (?<__CallingConvention__>
+          (?:
+            [AB]
+            |
+            [C-L]
+              (?{ push(@opstack, 'r '.(pos()-1).' 1 A cdecl'); })
+          )
+        )
+        (?<__StringLiteralText__>
+          (?:
+            [_a-zA-Z0-9]
+            |
+            \?\$[A-P][A-P]
+            |
+            \?[0-9A-Za-z]
+          ){1,20}
+        )
+        (?<__Identifier__>
+          [_a-zA-Z\$][_a-zA-Z0-9\$]*@
+        )
+        (?<__ArgsZTerminated__>
+          (?&__DataTypeInArgs__)+ @? Z
+        )
+        (?<__ArgsNonZTerminated__>
+          (?&__DataTypeInArgs__)+ @?
+        )
+        (?<__TemplateName__>
+          (?&__Identifier__) (?&__ArgsNonZTerminated__)
+        )
+        (?<__Class__>
+          (?:
+            [0-9]
+            |
+            \?\$ (?&__TemplateName__)
+            |
+            (?&__Identifier__)
+          )+@
+        )
+        (?<__DataTypeCommon__>
+          (?:
+            # extended types like _int64, bool and wchar_t
+            _[D-NW]
+            |
+            # simple types
+            [C-KMNOXZ]
+            |
+            # class, struct, union, cointerface
+            [TUVY] (?&__Class__)
+            |
+            # references
+            [AB]
+              (?{ push(@opstack, 'i '.pos().' E reference'); })
+              (?&__ModifiedType__)
+            |
+            # pointers
+            [QRS]
+              (?{ push(@opstack, 'i '.pos().' E pointer'); })
+              (?&__ModifiedType__)
+            |
+            P
+              (?:
+                # function pointer
+                6 (?&__CallingConvention__) (?&__DataTypeNotInArgs__) (?&__ArgsZTerminated__)
+                |
+                # other pointer
+                (?{ push(@opstack, 'i '.pos().' E pointer'); })
+                (?&__ModifiedType__)
+              )
+            |
+            W 4 (?&__Class__)
+            |
+            [0-9]
+            |
+            \$ (?:
+                 [0DQ] (?&__Number__)
+                 |
+                 F (?&__Number__){2}
+                 |
+                 G (?&__Number__){3}
+                 |
+                 \$ [ABCD] (?&__DataTypeNotInArgs__)
+               )
+          )
+        )
+        (?<__ModifiedType__>
+          [ABCD]
+          (?:
+            # multidimensional array
+            Y (?&__Number__)+
+          )?
+          (?&__DataTypeNotInArgs__)
+        )
+        (?<__DataTypeInArgs__>
+          (?:
+            (?&__DataTypeCommon__)
+            |
+            # template parameter
+            \? (?&__Number__)
+          )
+        )
+        (?<__DataTypeNotInArgs__>
+          (?:
+            (?&__DataTypeCommon__)
+            |
+            \? (?&__ModifiedType__)
+          )
+        )
+      )
+
+      # All mangled names start with a question mark
+      \?
+      (?:
+        # Ctors, dtors, operators etc have separate a priori defined
+        # special mangled names like the very simple ?0 for constructor
+        # and ?_R16789 for "RTTI Base Class Descriptor at (6,7,8,9)"
+        # whatever that might mean.
+        (
+          \?
+          ([0-9A-Z]
+           |
+           _(?:
+              # C is for string literals, see below
+              # R is RTTI, see immediately below
+              [0-9ABD-QS-Z]
+              |
+              R0(?&__DataTypeNotInArgs__)
+              |
+              R1(?&__Number__){4}
+              |
+              R[234]
+              |
+              _(?:
+                 E
+               )
+           )
+          )
+        )?
+        (?&__Class__)
+
+        (?:
+          # Static members and normal variables
+          [0-5]
+            (?&__DataTypeNotInArgs__)
+            [ABCD]
+          |
+          # Compiler-generated static
+          [67]
+            [ABCD]
+            (?:
+              @
+              |
+              (?&__Class__)
+            )
+          |
+          # Non-static Methods, implicit 'this'
+          [ABEFIJMNQRUV]
+            [AB]
+            (?{ push(@opstack, 'i '.(pos()-1).' E this'); })
+            (?&__CallingConvention__)
+            (?:
+              @
+              |
+              (?&__DataTypeNotInArgs__)
+            )
+            (?&__ArgsZTerminated__)
+          |
+          # Static methods
+          [CDKLST]
+            (?&__CallingConvention__)
+            (?:
+              @
+              |
+              (?&__DataTypeNotInArgs__)
+            )
+            (?&__ArgsZTerminated__)
+          |
+          # Thunks
+          [GHOPWX]
+            ((?&__Number__))
+            (?{ double_thunk($^N, pos()); })
+            [AB]
+            (?{ push(@opstack, 'i '.(pos()-1).' E this'); })
+            (?&__CallingConvention__)
+            (?:
+              @
+              |
+              (?&__DataTypeNotInArgs__)
+            )
+            (?&__ArgsZTerminated__)
+          |
+          # Functions
+          [YZ]
+            (?&__CallingConvention__)
+            (?:
+              @
+              |
+              (?&__DataTypeNotInArgs__)
+            )
+            (?&__ArgsZTerminated__)
+          |
+          # Template
+          \$ (?&__Identifier__) (?&__ArgsNonZTerminated__)
+        )
+        |
+        # pooled string literals
+        \?_C\@_[01](?&__Number__)(?&__32BitChecksum__)(?&__StringLiteralText__)@
+      )
+      /x;
+
+    while (my $op = pop(@opstack))
+      {
+        # print STDERR "op=$op\n";
+        my @a = split (' ', $op);
+        if ($a[0] eq 'i') {
+          substr($_,$a[1],0) = $a[2];
+        } elsif ($a[0] eq 'r') {
+          substr($_,$a[1],$a[2]) = $a[3];
+        }
+      }
+
+    print;
+  }


More information about the Libreoffice-commits mailing list