[gst-devel] PIC compatible assembly code

David Schleef ds at schleef.org
Tue Oct 16 03:06:14 CEST 2001


Those of you running Debian unstable on i386 may have noticed that
gstreamer and binutils-2.11.92.0.5 do not get along very well.
This appears to be because the new binutils is more strict about
non-PIC code linked into shared object code.  The following patch
converts all the trouble spots (I think) in assembly to PIC code.

Please test this and give me feedback.  I can't test all the
relevant code paths, except to see that they compile and load.



dave...

-------------- next part --------------
Index: libs/idct/mmxidct.S
===================================================================
RCS file: /cvsroot/gstreamer/gstreamer/libs/idct/mmxidct.S,v
retrieving revision 1.1
diff -u -u -r1.1 mmxidct.S
--- libs/idct/mmxidct.S	2000/08/14 10:11:03	1.1
+++ libs/idct/mmxidct.S	2001/10/16 09:49:37
@@ -4,8 +4,18 @@
  * for example in 11...1110000 format
  * If the iDCT is of I macroblock then 0.5 needs to be added to the;DC Component
  * (element[0][0] of the matrix)
+ *
+ * Notes:
+ *  - the scratchN variables should be put on the stack to avoid
+ *    reentrancy problems
  */
 
+#ifdef PIC
+#define pic_offset(a) a at GOTOFF(%ebx)
+#else
+#define pic_offset(a) a
+#endif
+
 /* extrn re_matrix */
 
 .data
@@ -95,9 +105,14 @@
 	pushl %edx
 	pushl %esi
 	pushl %edi
+#ifdef PIC
+	call here
+here:	popl %ebx
+	addl $_GLOBAL_OFFSET_TABLE_+[.-here],%ebx
+#endif
 	movl 8(%ebp),%esi		/* source matrix */
 	movq (%esi), %mm0
-	paddw x0000000000000004, %mm0
+	paddw pic_offset(x0000000000000004), %mm0
 	movq 8(%esi), %mm1
 	psllw $4, %mm0
 	movq 16(%esi), %mm2
@@ -145,7 +160,7 @@
 	movq %mm5,104(%esi)
 	movq %mm6,112(%esi)
 	movq %mm7,120(%esi)
-	leal preSC, %ecx
+	leal pic_offset(preSC), %ecx
 /* column 0: even part
  * use V4, V12, V0, V8 to produce V22..V25
  */
@@ -161,7 +176,7 @@
 	movq %mm1, %mm2			/* added 11/1/96 */
 	pmulhw 8*8(%esi),%mm5		/* V8 */
 	psubsw %mm0, %mm1		/* V16 */
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm1	/* 23170 ->V18 */
 	paddsw %mm0, %mm2		/* V17 */
 	movq %mm2, %mm0			/* duplicate V17 */
 	psraw $1, %mm2			/* t75=t82 */
@@ -202,7 +217,7 @@
 	paddsw %mm0, %mm3		/* V29 ; free mm0 */
 	movq %mm7, %mm1			/* duplicate V26 */
 	psraw $1, %mm3			/* t91=t94 */
-	pmulhw x539f539f539f539f,%mm7	/* V33 */
+	pmulhw pic_offset(x539f539f539f539f),%mm7	/* V33 */
 	psraw $1, %mm1			/* t96 */
 	movq %mm5, %mm0			/* duplicate V2 */
 	psraw $2, %mm4			/* t85=t87 */
@@ -210,15 +225,15 @@
 	psubsw %mm4, %mm0		/* V28 ; free mm4 */
 	movq %mm0, %mm2			/* duplicate V28 */
 	psraw $1, %mm5			/* t90=t93 */
-	pmulhw x4546454645464546,%mm0	/* V35 */
+	pmulhw pic_offset(x4546454645464546),%mm0	/* V35 */
 	psraw $1, %mm2			/* t97 */
 	movq %mm5, %mm4			/* duplicate t90=t93 */
 	psubsw %mm2, %mm1		/* V32 ; free mm2 */
-	pmulhw x61f861f861f861f8,%mm1	/* V36 */
+	pmulhw pic_offset(x61f861f861f861f8),%mm1	/* V36 */
 	psllw $1, %mm7			/* t107 */
 	paddsw %mm3, %mm5		/* V31 */
 	psubsw %mm3, %mm4		/* V30 ; free mm3 */
-	pmulhw x5a825a825a825a82,%mm4	/* V34 */
+	pmulhw pic_offset(x5a825a825a825a82),%mm4	/* V34 */
 	nop
 	psubsw %mm1, %mm0		/* V38 */
 	psubsw %mm7, %mm1		/* V37 ; free mm7 */
@@ -285,7 +300,7 @@
 	psubsw %mm7, %mm1		/* V50 */
 	pmulhw 8*9(%esi), %mm5		/* V9 */
 	paddsw %mm7, %mm2		/* V51 */
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm1	/* 23170 ->V52 */
 	movq %mm2, %mm6			/* duplicate V51 */
 	psraw $1, %mm2			/* t138=t144 */
 	movq %mm3, %mm4			/* duplicate V1 */
@@ -326,11 +341,11 @@
  * even more by doing the correction step in a later stage when the number
  * is actually multiplied by 16
  */
-	paddw x0005000200010001, %mm4
+	paddw pic_offset(x0005000200010001), %mm4
 	psubsw %mm6, %mm3		/* V60 ; free mm6 */
 	psraw $1, %mm0			/* t154=t156 */
 	movq %mm3, %mm1			/* duplicate V60 */
-	pmulhw x539f539f539f539f, %mm1	/* V67 */
+	pmulhw pic_offset(x539f539f539f539f), %mm1	/* V67 */
 	movq %mm5, %mm6			/* duplicate V3 */
 	psraw $2, %mm4			/* t148=t150 */
 	paddsw %mm4, %mm5		/* V61 */
@@ -339,13 +354,13 @@
 	psllw $1, %mm1			/* t169 */
 	paddsw %mm0, %mm5		/* V65 -> result */
 	psubsw %mm0, %mm4		/* V64 ; free mm0 */
-	pmulhw x5a825a825a825a82, %mm4	/* V68 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm4	/* V68 */
 	psraw $1, %mm3			/* t158 */
 	psubsw %mm6, %mm3		/* V66 */
 	movq %mm5, %mm2			/* duplicate V65 */
-	pmulhw x61f861f861f861f8, %mm3	/* V70 */
+	pmulhw pic_offset(x61f861f861f861f8), %mm3	/* V70 */
 	psllw $1, %mm6			/* t165 */
-	pmulhw x4546454645464546, %mm6	/* V69 */
+	pmulhw pic_offset(x4546454645464546), %mm6	/* V69 */
 	psraw $1, %mm2			/* t172 */
 /* moved from next block */
 	movq 8*5(%esi), %mm0		/* V56 */
@@ -470,7 +485,7 @@
 *	movq 8*13(%esi), %mm4		tmt13
 */
 	psubsw %mm4, %mm3		/* V134 */
-	pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm3	/* 23170 ->V136 */
 	movq 8*9(%esi), %mm6		/* tmt9 */
 	paddsw %mm4, %mm5		/* V135 ; mm4 free */
 	movq %mm0, %mm4			/* duplicate tmt1 */
@@ -499,17 +514,17 @@
 	psubsw %mm7, %mm0		/* V144 */
 	movq %mm0, %mm3			/* duplicate V144 */
 	paddsw %mm7, %mm2		/* V147 ; free mm7 */
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */
+	pmulhw pic_offset(x539f539f539f539f), %mm0	/* 21407-> V151 */
 	movq %mm1, %mm7			/* duplicate tmt3 */
 	paddsw %mm5, %mm7		/* V145 */
 	psubsw %mm5, %mm1		/* V146 ; free mm5 */
 	psubsw %mm1, %mm3		/* V150 */
 	movq %mm7, %mm5			/* duplicate V145 */
-	pmulhw x4546454645464546, %mm1	/* 17734-> V153 */
+	pmulhw pic_offset(x4546454645464546), %mm1	/* 17734-> V153 */
 	psubsw %mm2, %mm5		/* V148 */
-	pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */
+	pmulhw pic_offset(x61f861f861f861f8), %mm3	/* 25080-> V154 */
 	psllw $2, %mm0			/* t311 */
-	pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm5	/* 23170-> V152 */
 	paddsw %mm2, %mm7		/* V149 ; free mm2 */
 	psllw $1, %mm1			/* t313 */
 	nop	/* without the nop - freeze here for one clock */
@@ -535,7 +550,7 @@
 	paddsw %mm3, %mm6		/* V164 ; free mm3 */
 	movq %mm4, %mm3			/* duplicate V142 */
 	psubsw %mm5, %mm4		/* V165 ; free mm5 */
-	movq %mm2, scratch7		/* out7 */
+	movq %mm2, pic_offset(scratch7)		/* out7 */
 	psraw $4, %mm6
 	psraw $4, %mm4
 	paddsw %mm5, %mm3		/* V162 */
@@ -546,11 +561,11 @@
  */
 	movq %mm6, 8*9(%esi)		/* out9 */
 	paddsw %mm1, %mm0		/* V161 */
-	movq %mm3, scratch5		/* out5 */
+	movq %mm3, pic_offset(scratch5)		/* out5 */
 	psubsw %mm1, %mm5		/* V166 ; free mm1 */
 	movq %mm4, 8*11(%esi)		/* out11 */
 	psraw $4, %mm5
-	movq %mm0, scratch3		/* out3 */
+	movq %mm0, pic_offset(scratch3)		/* out3 */
 	movq %mm2, %mm4			/* duplicate V140 */
 	movq %mm5, 8*13(%esi)		/* out13 */
 	paddsw %mm7, %mm2		/* V160 */
@@ -560,7 +575,7 @@
 /* moved from the next block */
 	movq 8*3(%esi), %mm7
 	psraw $4, %mm4
-	movq %mm2, scratch1		/* out1 */
+	movq %mm2, pic_offset(scratch1)		/* out1 */
 /* moved from the next block */
 	movq %mm0, %mm1
 	movq %mm4, 8*15(%esi)		/* out15 */
@@ -617,15 +632,15 @@
 	paddsw %mm4, %mm3		/* V113 ; free mm4 */
 	movq %mm0, %mm4			/* duplicate V110 */
 	paddsw %mm1, %mm2		/* V111 */
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V117 */
+	pmulhw pic_offset(x539f539f539f539f), %mm0	/* 21407-> V117 */
 	psubsw %mm1, %mm5		/* V112 ; free mm1 */
 	psubsw %mm5, %mm4		/* V116 */
 	movq %mm2, %mm1			/* duplicate V111 */
-	pmulhw x4546454645464546, %mm5	/* 17734-> V119 */
+	pmulhw pic_offset(x4546454645464546), %mm5	/* 17734-> V119 */
 	psubsw %mm3, %mm2		/* V114 */
-	pmulhw x61f861f861f861f8, %mm4	/* 25080-> V120 */
+	pmulhw pic_offset(x61f861f861f861f8), %mm4	/* 25080-> V120 */
 	paddsw %mm3, %mm1		/* V115 ; free mm3 */
-	pmulhw x5a825a825a825a82, %mm2	/* 23170-> V118 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm2	/* 23170-> V118 */
 	psllw $2, %mm0			/* t266 */
 	movq %mm1, (%esi)		/* save V115 */
 	psllw $1, %mm5			/* t268 */
@@ -643,7 +658,7 @@
 	movq %mm6, %mm3			/* duplicate tmt4 */
 	psubsw %mm0, %mm6		/* V100 */
 	paddsw %mm0, %mm3		/* V101 ; free mm0 */
-	pmulhw x5a825a825a825a82, %mm6	/* 23170 ->V102 */
+	pmulhw pic_offset(x5a825a825a825a82), %mm6	/* 23170 ->V102 */
 	movq %mm7, %mm5			/* duplicate tmt0 */
 	movq 8*8(%esi), %mm1		/* tmt8 */
 	paddsw %mm1, %mm7		/* V103 */
@@ -677,10 +692,10 @@
 	movq 8*2(%esi), %mm3		/* V123 */
 	paddsw %mm4, %mm7		/* out0 */
 /* moved up from next block */
-	movq scratch3, %mm0
+	movq pic_offset(scratch3), %mm0
 	psraw $4, %mm7
 /* moved up from next block */
-	movq scratch5, %mm6 
+	movq pic_offset(scratch5), %mm6 
 	psubsw %mm4, %mm1		/* out14 ; free mm4 */
 	paddsw %mm3, %mm5		/* out2 */
 	psraw $4, %mm1
@@ -691,7 +706,7 @@
 	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
 	psraw $4, %mm2
 /* moved up to the prev block */
-	movq scratch7, %mm4
+	movq pic_offset(scratch7), %mm4
 /* moved up to the prev block */
 	psraw $4, %mm0
 	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
@@ -699,13 +714,13 @@
 	psraw $4, %mm6
 /* move back the data to its correct place
 * moved up to the prev block
- *	movq scratch3, %mm0
- *	movq scratch5, %mm6
- *	movq scratch7, %mm4
+ *	movq pic_offset(scratch3), %mm0
+ *	movq pic_offset(scratch5), %mm6
+ *	movq pic_offset(scratch7), %mm4
  *	psraw $4, %mm0
  *	psraw $4, %mm6
 */
-	movq scratch1, %mm1
+	movq pic_offset(scratch1), %mm1
 	psraw $4, %mm4
 	movq %mm0, 8*3(%esi)		/* out3 */
 	psraw $4, %mm1
Index: plugins/mpeg2/ac3dec/downmix_i386.S
===================================================================
RCS file: /cvsroot/gstreamer/gstreamer/plugins/mpeg2/ac3dec/downmix_i386.S,v
retrieving revision 1.1
diff -u -u -r1.1 downmix_i386.S
--- plugins/mpeg2/ac3dec/downmix_i386.S	2000/11/20 19:04:32	1.1
+++ plugins/mpeg2/ac3dec/downmix_i386.S	2001/10/16 09:49:51
@@ -5,6 +5,12 @@
 
 #ifdef __i386__
 
+#ifdef PIC
+#define pic_offset(a) a @GOTOFF(%ebx)
+#else
+#define pic_offset(a) a
+#endif
+
 	.file	"downmix.c"
 	.version	"01.01"
 gcc2_compiled.:
@@ -60,9 +66,14 @@
 	subl $16,%esp
 	pushl %esi
 	pushl %ebx
+#ifdef PIC
+	call here
+here:	popl %ebx
+	addl $_GLOBAL_OFFSET_TABLE_+[.-here],%ebx
+#endif
 	movl 8(%ebp),%edx
 	movl 12(%ebp),%ecx
-	flds .LC46
+	flds pic_offset(.LC46)
 	movl $255,%ebx
 	.p2align 4,,7
 .L379:
Index: plugins/mpeg2/ac3dec/downmix_kni.S
===================================================================
RCS file: /cvsroot/gstreamer/gstreamer/plugins/mpeg2/ac3dec/downmix_kni.S,v
retrieving revision 1.1
diff -u -u -r1.1 downmix_kni.S
--- plugins/mpeg2/ac3dec/downmix_kni.S	2000/11/20 19:04:32	1.1
+++ plugins/mpeg2/ac3dec/downmix_kni.S	2001/10/16 09:49:51
@@ -22,6 +22,12 @@
 
 #ifdef __i386__
 
+#ifdef PIC
+#define pic_offset(a) a @GOTOFF(%ebx)
+#else
+#define pic_offset(a) a
+#endif
+
 .section .rodata
 	.align 4
 sqrt2:	.float 0f0.7071068
@@ -359,8 +365,13 @@
 	pushl %eax
 	pushl %ebx
 	pushl %ecx
+#ifdef PIC
+	call here
+here:	popl %ebx
+	addl $_GLOBAL_OFFSET_TABLE_+[.-here],%ebx
+#endif
 
-	movl $sqrt2, %eax
+	leal pic_offset(sqrt2), %eax
 	movss (%eax), %xmm7
 	movl 8(%ebp), %eax	/* s16_samples */
 	movl 12(%ebp), %ebx	/* left */
Index: plugins/mpeg2/ac3dec/srfft_kni.S
===================================================================
RCS file: /cvsroot/gstreamer/gstreamer/plugins/mpeg2/ac3dec/srfft_kni.S,v
retrieving revision 1.1
diff -u -u -r1.1 srfft_kni.S
--- plugins/mpeg2/ac3dec/srfft_kni.S	2000/11/20 19:04:32	1.1
+++ plugins/mpeg2/ac3dec/srfft_kni.S	2001/10/16 09:49:51
@@ -22,6 +22,12 @@
 
 #ifdef __i386__
 
+#ifdef PIC
+#define pic_offset(a) a @GOTOFF(%ebx)
+#else
+#define pic_offset(a) a
+#endif
+
 .section	.rodata
 	.align 16
 hsqrt2:	 .float 0f0.707106781188
@@ -73,6 +79,11 @@
 	movl 8(%ebp), %eax		/* complext_t */
 
 	pushl %ebx
+#ifdef PIC
+	call here
+here:	popl %ebx
+	addl $_GLOBAL_OFFSET_TABLE_+[.-here],%ebx
+#endif
 	movlps (%eax), %xmm0		/* x[0] */
 	movlps 32(%eax), %xmm1		/* x[4] */
 	movhps 16(%eax), %xmm0		/* x[2] | x[0] */
@@ -94,7 +105,7 @@
 	subps %xmm5, %xmm7		/* yb */
 
 	movhps 24(%eax), %xmm1		/* x[3] | x[1] */
-	movl $hsqrt2, %ebx
+	leal pic_offset(hsqrt2), %ebx
 	movlps 40(%eax), %xmm2		/* x[5] */
 	movhps 56(%eax), %xmm2		/* /x[7] | x[5] */
 	movaps %xmm1, %xmm3		/* x[3] | x[1] */
@@ -111,7 +122,7 @@
 	movlhps %xmm6, %xmm1		/* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
 	shufps $0xe4, %xmm6, %xmm5	/* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
 	movaps %xmm1, %xmm3		/* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
-	movl $C_1, %ebx
+	leal pic_offset(C_1), %ebx
 	addps %xmm5, %xmm1		/* u */
 	subps %xmm5, %xmm3		/* v */
 	movaps %xmm0, %xmm2		/* yb */
@@ -176,7 +187,7 @@
 	movhlps %xmm5, %xmm7		/* wT[1].im * d[1].im | wT[1].re * d[1].im */
 	movlhps %xmm6, %xmm5		/* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
 	shufps $0xb1, %xmm6, %xmm7	/* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
-	movl $C_1, %edi
+	leal pic_offset(C_1), %edi
 	movaps (%edi), %xmm4
 	mulps %xmm4, %xmm7
 	addps %xmm7, %xmm5		/* wB[1] * d3[1] | wT[1] * d[1] */
@@ -236,7 +247,7 @@
 	mulps %xmm5, %xmm4		/* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
 	mulps %xmm7, %xmm6		/* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
 	shufps $0xb1, %xmm2, %xmm1	/* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
-	movl $C_1, %edi
+	leal pic_offset(C_1), %edi
 	movaps (%edi), %xmm3		/* 1.0 | -1.0 | 1.0 | -1.0 */
 
 	movhlps %xmm4, %xmm5		/* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
Index: plugins/mpeg2/video/getpic.c
===================================================================
RCS file: /cvsroot/gstreamer/gstreamer/plugins/mpeg2/video/getpic.c,v
retrieving revision 1.9
diff -u -u -r1.9 getpic.c
--- plugins/mpeg2/video/getpic.c	2001/01/02 16:29:46	1.9
+++ plugins/mpeg2/video/getpic.c	2001/10/16 09:49:53
@@ -636,7 +636,11 @@
     */
     
       __asm__ __volatile__(
+#ifdef PIC
+             "movq        MMX_128 at GOTOFF(%%ebx),%%mm4\n"
+#else
              "movq        MMX_128,%%mm4\n"
+#endif
              ".align 8\n"
              "1:"
                "movq      (%1),   %%mm0\n"


More information about the gstreamer-devel mailing list