/* TI Davinci driver - C64X+ DSP Library (c) Copyright 2008 directfb.org (c) Copyright 2007 Telio AG Written by Denis Oliver Kropp and Olaf Dreesen . All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ //#define DIRECT_ENABLE_DEBUG #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "davinci_c64x.h" /**********************************************************************************************************************/ #define C64X_DEVICE "/dev/c64x" #define C64X_DEVICE0 "/dev/c64x0" #define C64X_QLEN direct_page_align( sizeof(c64xTaskControl) ) #define C64X_MLEN direct_page_align( 0x2000000 ) __attribute__((noinline)) static void davinci_c64x_queue_error( DavinciC64x *c64x, const char *msg ) { c64xTaskControl *ctl = c64x->ctl; uint32_t dsp = ctl->QL_dsp; uint32_t arm = ctl->QL_arm; uint32_t armp = (arm-1) & C64X_QUEUE_MASK; c64xTask *dsp_task = &c64x->QueueL[dsp]; c64xTask *arm_task = &c64x->QueueL[arm]; c64xTask *armp_task = &c64x->QueueL[armp]; D_PERROR( "Davinci/C64X+: %s [DSP %d / %d (%s), ARM %d / %d (%s) <- %d / %d (%s)]\n", msg, dsp, (dsp_task->c64x_function >> 2) & 0x3fff, state_names[dsp_task->c64x_function & 3], arm, (arm_task->c64x_function >> 2) & 0x3fff, state_names[arm_task->c64x_function & 3], armp, (armp_task->c64x_function >> 2) & 0x3fff, state_names[armp_task->c64x_function & 3] ); } /* 1. Idle Case ARM ARM DSP DSP | . . . . . . . . | | . . . . . . . . | free = length-1 2. Busy Case (ARM after) ARM ARM DSP DSP | o . . . . . . . | | . o o . . . . . | free = length-1 - arm + dsp 3. Busy Case (ARM before) ARM ARM DSP DSP | . . . . . o o o | | o o . . . . . o | free = dsp - arm - 1 4. Full Case (ARM after) ARM DSP | o o o o o o o . | free = 0 5. Full Case (ARM before) ARM ARM DSP DSP | o o o o o . o o | | . o o o o o o o | free = 0 */ DFBResult davinci_c64x_emit_tasks( DavinciC64x *c64x, DavinciC64xTasks *tasks, DavinciC64xEmitFlags flags ) { c64xTaskControl *ctl = c64x->ctl; uint32_t arm = ctl->QL_arm; unsigned int emitted = 0; unsigned int timeout = 23; D_MAGIC_ASSERT( tasks, DavinciC64xTasks ); while (emitted < tasks->num_tasks) { uint32_t dsp = ctl->QL_dsp; int free; if (arm == dsp) free = C64X_QUEUE_LENGTH - 1; else if (arm > dsp) free = C64X_QUEUE_LENGTH - 1 - arm + dsp; else free = dsp - arm - 1; if (free) { int emit = MIN( free, tasks->num_tasks - emitted ); int copy = MIN( emit, C64X_QUEUE_LENGTH - arm ); memcpy( (void*) &c64x->QueueL[arm], (void*) &tasks->tasks[emitted], sizeof(c64xTask) * copy ); if (copy < emit) { memcpy( (void*) &c64x->QueueL[0], (void*) &tasks->tasks[emitted+copy], sizeof(c64xTask) * (emit - copy) ); arm = (emit - copy); } else arm = (arm + copy) & C64X_QUEUE_MASK; mb(); ctl->QL_arm = arm; mb(); emitted += emit; timeout = 23; } else { if (!timeout--) { davinci_c64x_queue_error( c64x, "Emit Timeout!" ); return DFB_TIMEOUT; } usleep( 7000 ); } } if (flags & C64X_TEF_RESET) tasks->num_tasks = 0; return DFB_OK; } DFBResult davinci_c64x_tasks_init( DavinciC64xTasks *tasks, unsigned int size ) { tasks->tasks = D_MALLOC( sizeof(c64xTask) * size ); if (!tasks->tasks) return D_OOM(); tasks->max_tasks = size; tasks->num_tasks = 0; D_MAGIC_SET( tasks, DavinciC64xTasks ); return DFB_OK; } DFBResult davinci_c64x_tasks_destroy( DavinciC64xTasks *tasks ) { D_MAGIC_ASSERT( tasks, DavinciC64xTasks ); D_ASSERT( tasks->tasks != NULL ); D_FREE( (void*) tasks->tasks ); tasks->tasks = NULL; D_MAGIC_CLEAR( tasks ); return DFB_OK; } DFBResult davinci_c64x_wait_low( DavinciC64x *c64x ) { DFBResult ret; c64xTaskControl *ctl = c64x->ctl; while (ctl->QL_dsp != ctl->QL_arm) { c64xTask *task = c64x_get_task( c64x ); task->c64x_function = C64X_FLAG_TODO | C64X_FLAG_INTERRUPT; c64x_submit_task( c64x, task ); if (ioctl( c64x->fd, C64X_IOCTL_WAIT_LOW )) { c64xTask *dsp_task = &c64x->QueueL[ctl->QL_dsp]; ret = errno2result( errno ); D_PERROR( "Davinci/C64X+: C64X_IOCTL_WAIT_LOW failed! [DSP %d / %d (%s), ARM %d / %d (%s)]\n", ctl->QL_dsp, (dsp_task->c64x_function >> 2) & 0x3fff, state_names[dsp_task->c64x_function & 3], ctl->QL_arm, (task->c64x_function >> 2) & 0x3fff, state_names[task->c64x_function & 3] ); return ret; } } return DFB_OK; } /**********************************************************************************************************************/ /* Benchmarking or Testing */ /**********************************************************************************************************************/ #if 1 #define BRINTF(x...) do { direct_log_printf( NULL, x ); } while (0) #else #define BRINTF(x...) printf( x ) #endif static void bench_mem( const char *name, void *ptr, int length, bool copy, bool from ) { int i, num; long long t1, t2, dt, total; char buf[0x100]; if (length > sizeof(buf)) length = sizeof(buf); num = 0x2000000 / length; t1 = direct_clock_get_abs_micros(); if (copy) { if (from) for (i=0; imem + 0x01000000; int *src = c64x->mem + 0x01100000; #if 0 src[num++] = DVA_BLOCK_WORD( 100, 0, 1 ); src[num++] = DVA_BLOCK_WORD( 200, 0, 0 ); src[num++] = DVA_BLOCK_WORD( 210, 1, 0 ); src[num++] = DVA_BLOCK_WORD( 220, 2, 1 ); src[num++] = DVA_BLOCK_WORD( 300, 0, 1 ); src[num++] = DVA_BLOCK_WORD( 400, 0, 0 ); src[num++] = DVA_BLOCK_WORD( 410, 1, 1 ); src[num++] = DVA_BLOCK_WORD( 500, 0, 0 ); src[num++] = DVA_BLOCK_WORD( 510, 63, 1 ); src[num++] = DVA_BLOCK_WORD( 600, 63, 1 ); #else src[num++] = DVA_BLOCK_WORD(136, 0, 0); src[num++] = DVA_BLOCK_WORD(-12, 8, 0); src[num++] = DVA_BLOCK_WORD(7, 16, 0); src[num++] = DVA_BLOCK_WORD(-2, 24, 1); src[num++] = DVA_BLOCK_WORD(136, 0, 0); src[num++] = DVA_BLOCK_WORD(-12, 8, 0); src[num++] = DVA_BLOCK_WORD(7, 16, 0); src[num++] = DVA_BLOCK_WORD(-2, 24, 1); src[num++] = DVA_BLOCK_WORD(1076, 0, 0); src[num++] = DVA_BLOCK_WORD(-204, 8, 0); src[num++] = DVA_BLOCK_WORD(-168, 16, 0); src[num++] = DVA_BLOCK_WORD(-129, 24, 0); src[num++] = DVA_BLOCK_WORD(-100, 32, 0); src[num++] = DVA_BLOCK_WORD(-40, 40, 0); src[num++] = DVA_BLOCK_WORD(-14, 48, 1); #if 1 src[num++] = DVA_BLOCK_WORD(1068, 0, 0); src[num++] = DVA_BLOCK_WORD(2, 1, 0); src[num++] = DVA_BLOCK_WORD(-202, 8, 0); src[num++] = DVA_BLOCK_WORD(-168, 16, 0); src[num++] = DVA_BLOCK_WORD(-2, 9, 0); src[num++] = DVA_BLOCK_WORD(-129, 24, 0); src[num++] = DVA_BLOCK_WORD(-97, 32, 0); src[num++] = DVA_BLOCK_WORD(-40, 40, 0); src[num++] = DVA_BLOCK_WORD(-13, 48, 1); #else src[num++] = DVA_BLOCK_WORD(1068, 0, 0); // src[num++] = DVA_BLOCK_WORD(2, 1, 0); src[num++] = DVA_BLOCK_WORD(-202, 8, 0); src[num++] = DVA_BLOCK_WORD(-1, 16, 0); // src[num++] = DVA_BLOCK_WORD(-2, 9, 0); src[num++] = DVA_BLOCK_WORD(-1, 24, 0); src[num++] = DVA_BLOCK_WORD(-97, 32, 1); // src[num++] = DVA_BLOCK_WORD(-40, 40, 0); // src[num++] = DVA_BLOCK_WORD(-13, 48, 1); #endif src[num++] = DVA_BLOCK_WORD(1048, 0, 0); src[num++] = DVA_BLOCK_WORD(-26, 8, 0); src[num++] = DVA_BLOCK_WORD(4, 16, 0); src[num++] = DVA_BLOCK_WORD(5, 24, 0); src[num++] = DVA_BLOCK_WORD(-4, 32, 1); src[num++] = DVA_BLOCK_WORD(996, 0, 0); src[num++] = DVA_BLOCK_WORD(24, 8, 0); src[num++] = DVA_BLOCK_WORD(-2, 24, 0); src[num++] = DVA_BLOCK_WORD(3, 32, 0); src[num++] = DVA_BLOCK_WORD(-4, 48, 1); #endif BRINTF("\n"); BRINTF("\n\n.======================== Testing load_block (dct_type_interlaced: %s) ========================.\n", dct_type_interlaced ? "yes" : "no"); BRINTF("\n"); BRINTF( "SOURCE (DVABlockWords)\n" ); BRINTF("\n"); for (i=0; i> 16, (src[i] >> 1) & 0x3f, src[i] & 1); BRINTF("\n\n"); memset( dst, 0x55, 0x100000 ); // test routine davinci_c64x_load_block( c64x, DAVINCI_C64X_MEM+0x01100000, 10, dct_type_interlaced ? 0x7f : 0x3f ); // copy idct buffer to memory where we can read it davinci_c64x_blit_16( c64x, DAVINCI_C64X_MEM+0x01000000, 0, 0xf065c0, 0, 16 * 24, 1 ); davinci_c64x_write_back_all( c64x ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); BRINTF( "-> IDCT BUFFER (16x16 + [ 8x8 8x8 ] shorts)\n" ); BRINTF("\n"); for (i=0; i<16*24; i++) { BRINTF("%5d ", dst[i] ); if ((i&15)==15) { BRINTF("\n"); } if ((i&255)==255) { BRINTF("\n"); } } BRINTF("\n\n"); #if 1 s16 *blocks = c64x->mem + 0x01200000; int offset = 0; memset( blocks, 0, 1024 ); for (i=0; i> 1) & 0x3f)] = src[i] >> 16; if (src[i] & 1) offset += 64; } memset( dst, 0x55, 0x100000 ); // test routine for (i=0; i<6; i++) davinci_c64x_dva_idct( c64x, DAVINCI_C64X_MEM+0x01000000 + i*128, 16, DAVINCI_C64X_MEM+0x01200000 + i*128 ); davinci_c64x_write_back_all( c64x ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); BRINTF( "-> SINGLE IDCT (59) BLOCKS (6x 8x8 shorts)\n" ); BRINTF("\n"); for (i=0; i<6*64; i++) { BRINTF("%5d ", dst[i] ); if ((i&7)==7) { BRINTF("\n"); } if ((i&63)==63) { BRINTF("\n"); } } BRINTF("\n\n"); #endif #if 0 // s16 blocks[384]; // int offset = 0; offset = 0; memset( blocks, 0, 1024 ); for (i=0; i> 1) & 0x3f)] = src[i] >> 16; if (src[i] & 1) { int n; for (n = 0; n < 8; n++) idct_row (blocks + offset + 8 * n); for (n = 0; n < 8; n++) idct_col (blocks + offset + n); offset += 64; } } BRINTF( "-> REFERENCE IDCT BLOCKS (6x 8x8 shorts)\n" ); BRINTF("\n"); for (i=0; i<6*64; i++) { BRINTF("%5d ", blocks[i] ); if ((i&7)==7) { BRINTF("\n"); } if ((i&63)==63) { BRINTF("\n"); } } BRINTF("\n\n"); #endif } static inline void bench_dezigzag( DavinciC64x *c64x ) { int i, num; long long t1, t2, dt, total; //int length = 0x10000; num = 0x200000;// / length; short *p = c64x->mem + 0x1000000; for (i=0; i<64; i++) { p[i] = i; BRINTF("%3d ", p[i]); if (i%8==7) { BRINTF("\n"); } } t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_DEZIGZAG | C64X_FLAG_TODO; task->c64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x200000; task->c64x_arg[1] = (DAVINCI_C64X_MEM+0x01000000)+0x000000; //task->c64x_arg[2] = length/4; c64x_submit_task( c64x, task ); } davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); p = c64x->mem + 0x1200000; for (i=0; i<64; i++) { BRINTF("%3d ", p[i]); if (i%8==7) { BRINTF("\n"); } } dt = t2 - t1; total = num;// * length; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "de_zigzag()", total * 1000000ULL / dt ); } #define DUMP_PIXELS 1 static inline void bench_blend_argb( DavinciC64x *c64x, int sub ) { int i, num; long long t1, t2, dt, total; num = 1;//0x20000; u32 *src = c64x->mem + 0x1000000; u32 *dst = c64x->mem + 0x1200000; BRINTF( "\nTESTING BLEND_32 SUB %d\n", sub ); BRINTF( "\nSOURCE " ); for (i=0; i> 24, (src[i] >> 16) & 0xff, (src[i] >> 8) & 0xff, src[i] & 0xff); if (i%8==7) { BRINTF("\n"); } } BRINTF( "\nDESTINATION " ); for (i=0; i> 24, (dst[i] >> 16) & 0xff, (dst[i] >> 8) & 0xff, dst[i] & 0xff); if (i%8==7) { BRINTF("\n"); } } t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_BLEND_32 | C64X_FLAG_TODO | (sub << 16); task->c64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x200000; task->c64x_arg[1] = 32; task->c64x_arg[2] = (DAVINCI_C64X_MEM+0x01000000)+0x000000; task->c64x_arg[3] = 32; task->c64x_arg[4] = 8; task->c64x_arg[5] = 8; task->c64x_arg[6] = 0x80; c64x_submit_task( c64x, task ); } davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF( "\n\nDESTINATION (AFTER) " ); for (i=0; i> 24, (dst[i] >> 16) & 0xff, (dst[i] >> 8) & 0xff, dst[i] & 0xff); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "blend_32(8x8)", total * 1000000ULL / dt ); } static inline void bench_fetch_uyvy( DavinciC64x *c64x, bool interleave, int xoff, int yoff ) { int i, x, y, num=1; long long t1, t2, dt, total; u8 *yuv = c64x->mem + 0x1000000; u8 *src = c64x->mem + 0x1200000; BRINTF("\n\n\n.======================== Testing fetch_uyvy (inter %d, xoff %d, yoff %d) ========================.\n\n", interleave, xoff, yoff); for (y=0; y<20; y++) { for (x=0; x<40; x++) { int val = (y*40)+x; src[y*1440 + x] = val; BRINTF("%02x ", val&0xff); } BRINTF("\n"); } BRINTF("\n"); memset( yuv, 0xAA, 0x100000 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x000000; task->c64x_arg[1] = (DAVINCI_C64X_MEM+0x01000000)+0x200000 + yoff*1440 + xoff * 2; task->c64x_arg[2] = 1440; task->c64x_function = (21 << 2) | C64X_FLAG_TODO; c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF( "\n\nDESTINATION\n\nY:\n" ); for (y=0;y<27;y++) { if (y==18) BRINTF("\nUV:\n"); for (x=0;x<32;x++) { BRINTF("%02x ",yuv[y*32+x]); } BRINTF("\n"); } dt = t2 - t1; total = num; D_INFO("\n\nDavinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "blend_fetch_uyvy(16x16)", total * 1000000ULL / dt ); } #if 0 static inline void bench_fetch_uyvy( DavinciC64x *c64x, bool interleave, int xoff, int yoff ) { int i, x, y, num; long long t1, t2, dt, total; num = 1;//0x20000; u8 *yuv = c64x->mem + 0x1000000; u8 *src = c64x->mem + 0x1200000; BRINTF("\n"); BRINTF("\n\n.======================== Testing fetch_uyvy (inter %d, xoff %d, yoff %d) ========================.\n", interleave, xoff, yoff); BRINTF("\n"); BRINTF( "SOURCE (20x20)\n" ); for (y=0; y<20; y++) { for (x=0; x<40; x++) { int val = (x & 1) ? (x * 4 + y*0x10) : (x/4 + 0x40 + (x&2) * 0x10 + y*0x08); src[y*1440 + x] = val; BRINTF("%02x ", val&0xff); } BRINTF("\n"); } BRINTF("\n"); memset( yuv, 0x55, 0x100000 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = (19 << 2) | C64X_FLAG_TODO; task->c64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x000000; task->c64x_arg[1] = (DAVINCI_C64X_MEM+0x01000000)+0x200000 + yoff*1440 + xoff * 2; task->c64x_arg[2] = 1440; task->c64x_arg[3] = 16; task->c64x_arg[4] = interleave ? 1 : 0; c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF( "\n\nDESTINATION (17x18 / [9x9 9x9])\n" ); for (y=0; y<18; y++) { for (x=0; x<17; x++) { BRINTF("%02x ", yuv[y*32 + x]); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<9; y++) { for (x=0; x<9; x++) { BRINTF("%02x ", yuv[y*32 + x + 32*18]); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<9; y++) { for (x=0; x<9; x++) { BRINTF("%02x ", yuv[y*32 + x + 32*18+16]); } BRINTF("\n"); } BRINTF("\n"); BRINTF("\n\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "blend_fetch_uyvy(16x16)", total * 1000000ULL / dt ); } #endif #if 0 static inline void bench_put_idct( DavinciC64x *c64x, int dct_type ) { int i, num; long long t1, t2, dt, total; //int length = 0x10000; num = 0x10000;// / length; u8 *dst = c64x->mem + 0x01000000; int *src = c64x->mem + 0x01200000; src[0] = DVA_BLOCK_WORD( 100, 0, 1 ); src[1] = DVA_BLOCK_WORD( 200, 0, 0 ); src[2] = DVA_BLOCK_WORD( 210, 1, 0 ); src[3] = DVA_BLOCK_WORD( 220, 2, 1 ); src[4] = DVA_BLOCK_WORD( 300, 0, 1 ); src[5] = DVA_BLOCK_WORD( 400, 0, 0 ); src[6] = DVA_BLOCK_WORD( 410, 1, 1 ); src[7] = DVA_BLOCK_WORD( 500, 0, 0 ); src[8] = DVA_BLOCK_WORD( 510, 63, 1 ); src[9] = DVA_BLOCK_WORD( 600, 63, 1 ); BRINTF("\n"); BRINTF("\n\n.======================== Testing put_idct (%d) ========================.\n", dct_type); BRINTF("\n"); memset( dst, 0x55, 0x100000 ); for (i=0; i<10; i++) { BRINTF("0x%08x (%d, %d, %d)\n", (u32)src[i], src[i] >> 16, (src[i] >> 1) & 0x3f, src[i] & 1); } BRINTF("\n"); t1 = direct_clock_get_abs_micros(); { c64xTask *task = c64x_get_task( c64x ); task->c64x_function = C64X_LOAD_BLOCK | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x1200000; task->c64x_arg[1] = 10; task->c64x_arg[2] = 0x3f; c64x_submit_task( c64x, task ); } davinci_c64x_blit_16( c64x, (DAVINCI_C64X_MEM+0x01000000), 0, 0xf06180, 0, 384, 1 ); davinci_c64x_blit_16( c64x, (DAVINCI_C64X_MEM+0x01100000), 0, 0xf06480, 0, 384/2, 1 ); davinci_c64x_put_uyvy_16x16( c64x, (DAVINCI_C64X_MEM+0x01300000), 32, 0xf06180, 0 ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); for (i=0; i<384; i++) { BRINTF("%5d ", dst[i] ); if (i%8==7) { BRINTF("\n"); } if (i%64==63) { BRINTF("\n"); } } BRINTF("\n\n"); for (i=0; i<384; i++) { BRINTF("%3d ", duv[i] ); if (i%8==7) { BRINTF("\n"); } if (i%64==63) { BRINTF("\n"); } } BRINTF("\n\n"); for (i=0; i<16*16*2; i++) { BRINTF("%02x ", duy[i]); if (i%32==31) { BRINTF("\n"); } } BRINTF("\n"); dt = t2 - t1; total = num;// * length; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "block_load()", total * 1000000ULL / dt ); } #endif static inline void bench_put_mc( DavinciC64x *c64x, bool interleave ) { int x, y, i, num; long long t1, t2, dt, total; num = 1;//720/16*576/16; u8 *dst = c64x->mem + 0x1000000; u8 *src = c64x->mem + 0x1200000; BRINTF("\n"); BRINTF("\n\n.======================== Testing put_mc (%d) ========================.\n", interleave); BRINTF("\n"); BRINTF("SOURCE (16x16 / [8x8 8x8]\n"); for (y=0; y<16; y++) { for (x=0; x<16; x++) { u8 val = (x << 4) + y; src[y*16 + x] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16 + 8] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); memset( dst, 0x55, 0x100000 ); davinci_c64x_blit_32( c64x, C64X_MC_BUFFER_Y, 16, DAVINCI_C64X_MEM+0x1200000, 16, 4, 24 ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_PUT_MC_UYVY_16x16 | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x1000000; task->c64x_arg[1] = 1440; task->c64x_arg[2] = interleave ? 1 : 0; c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("\n"); BRINTF("DESTINATION (16x16 UYVY)\n"); for (y=0; y<16; y++) { for (x=0; x<32; x++) BRINTF("%02x ", dst[y*1440 + x]); BRINTF("\n"); } BRINTF("\n\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "put_mc_16x16()", total * 1000000ULL / dt ); } static inline void bench_put_sum( DavinciC64x *c64x, bool interleave ) { int x, y, i, num; long long t1, t2, dt, total; num = 1;//720/16*576/16; u8 *dst = c64x->mem + 0x1000000; u8 *src = c64x->mem + 0x1200000; u32 *words = c64x->mem + 0x1100000; BRINTF("\n"); BRINTF("\n\n.======================== Testing put_sum (%d) ========================.\n", interleave); BRINTF("\n"); BRINTF("WORDS (6x IDCT with one value)\n"); words[0] = DVA_BLOCK_WORD( 0, 0, 1 ); words[1] = DVA_BLOCK_WORD( 50, 0, 1 ); words[2] = DVA_BLOCK_WORD( 100, 0, 1 ); words[3] = DVA_BLOCK_WORD( 150, 0, 1 ); words[4] = DVA_BLOCK_WORD( 200, 0, 1 ); words[5] = DVA_BLOCK_WORD( 250, 0, 1 ); BRINTF("\n"); BRINTF("\n"); memset( dst, 0x55, 0x100000 ); for (i=0; i<6; i++) { BRINTF("0x%08x (%d, %d, %d)\n", (u32)words[i], words[i] >> 16, (words[i] >> 1) & 0x3f, words[i] & 1); } { c64xTask *task = c64x_get_task( c64x ); task->c64x_function = C64X_LOAD_BLOCK | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x1100000; task->c64x_arg[1] = 6; task->c64x_arg[2] = 0x3f; c64x_submit_task( c64x, task ); } BRINTF("\n"); BRINTF("SOURCE (16x16 / [8x8 8x8]\n"); for (y=0; y<16; y++) { for (x=0; x<16; x++) { u8 val = (x << 4) + y; src[y*16 + x] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16 + 8] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); memset( dst, 0x55, 0x100000 ); davinci_c64x_blit_32( c64x, C64X_MC_BUFFER_Y, 16, DAVINCI_C64X_MEM+0x1200000, 16, 4, 24 ); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_PUT_SUM_UYVY_16x16 | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x1000000; task->c64x_arg[1] = 1440; task->c64x_arg[2] = interleave ? 1 : 0; c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("\n"); BRINTF("DESTINATION (16x16 UYVY)\n"); for (y=0; y<16; y++) { for (x=0; x<32; x++) BRINTF("%02x ", dst[y*1440 + x]); BRINTF("\n"); } BRINTF("\n\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "put_sum_16x16()", total * 1000000ULL / dt ); } static inline void bench_sat_mc( DavinciC64x *c64x ) { int x, y, i, num; long long t1, t2, dt, total; num = 1;//720/16*576/16; u8 *dst = c64x->mem + 0x1000000; u8 *src = c64x->mem + 0x1200000; BRINTF("\n\n.======================== Testing sat_mc ========================.\n"); BRINTF("\n"); BRINTF("SOURCE (16x16 / [8x8 8x8]\n"); for (y=0; y<16; y++) { for (x=0; x<16; x++) { u8 val = (x << 4) + y; src[y*16 + x] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) { u8 val = (x << 4) + y*2; src[y*16 + x + 16*16 + 8] = val; BRINTF("%02x ", val); } BRINTF("\n"); } BRINTF("\n"); memset( dst, 0x55, 0x100000 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = (57 << 2) | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x1000000; task->c64x_arg[1] = DAVINCI_C64X_MEM+0x1200000; task->c64x_arg[2] = 16; c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("\n"); BRINTF("DESTINATION (16x16 / [8x8 8x8]\n"); for (y=0; y<16; y++) { for (x=0; x<16; x++) BRINTF("%02x ", dst[y*16 + x]); BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) BRINTF("%02x ", dst[y*16 + x + 16*16]); BRINTF("\n"); } BRINTF("\n"); for (y=0; y<8; y++) { for (x=0; x<8; x++) BRINTF("%02x ", dst[y*16 + x + 16*16 + 8]); BRINTF("\n"); } BRINTF("\n\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "sat_mc_16x16()", total * 1000000ULL / dt ); } static inline void bench_uyvy_1( DavinciC64x *c64x, bool progressive ) { c64xTask *task; int i, num; long long t1, t2, dt, total; num = 720/16*576/16; u8 *u = c64x->mem + 0x1200000; u8 *p = c64x->mem + 0x1000000; BRINTF("\n\n\n.======================== Testing put_uyvy (%s) ========================.\n\n", progressive ? "progressive" : "interlaced"); for (i=0; i<256; i++) { p[i] = i - 128; BRINTF("Y%-3d ", p[i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[256+i] = i-32; BRINTF("U%-3d ", p[256+i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[320+i] = i-32; BRINTF("V%-3d ", p[320+i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); for (i=0; i<384; i++) { BRINTF("%4d ", p[i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); memset( u, 0x55, 720*576*2 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_PUT_UYVY_16x16 | C64X_FLAG_TODO; task->c64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x200000+i*16*16*2; task->c64x_arg[1] = 720 * 2; task->c64x_arg[2] = (DAVINCI_C64X_MEM+0x01000000); task->c64x_arg[3] = 0; c64x_submit_task( c64x, task ); } BRINTF("\n"); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); for (i=0; i<16*16*2; i++) { BRINTF("%02x ", u[i/32*720*2 + i%32]); if (i%32==31) { BRINTF("\n"); } } BRINTF("\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "put_uyvy_16x16()", total * 1000000ULL / dt ); } static inline void bench_uyvy_2( DavinciC64x *c64x, bool progressive ) { c64xTask *task; int i, num; long long t1, t2, dt, total; num = 1;//720/16*576/16; u8 *u = c64x->mem + 0x0200000; u8 *p = c64x->mem + 0x0000000; BRINTF("\n\n\n.======================== Testing put_uyvy (%s) ========================.\n\n", progressive ? "progressive" : "interlaced"); for (i=0; i<256; i++) { p[i] = i/8; BRINTF("Y%-3d ", p[i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[256+i] = i/8 + 128; BRINTF("U%-3d ", p[256+i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[320+i] = i/8 + 240; BRINTF("V%-3d ", p[320+i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); for (i=0; i<384; i++) { BRINTF("%4d ", p[i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); memset( u, 0x55, 720*576*2 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_PUT_UYVY_16x16 | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM+0x200000+i*16*16*2; task->c64x_arg[1] = 720 * 2; task->c64x_arg[2] = DAVINCI_C64X_MEM; task->c64x_arg[3] = 0; c64x_submit_task( c64x, task ); } BRINTF("\n"); davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); for (i=0; i<16*16*2; i++) { BRINTF("%02x ", u[i/32*720*2 + i%32]); if (i%32==31) { BRINTF("\n"); } } BRINTF("\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "put_uyvy_16x16()", total * 1000000ULL / dt ); } static inline void bench_uyvy_3( DavinciC64x *c64x, bool progressive ) { c64xTask *task; int i, num; long long t1, t2, dt, total; num = 1;//720/16*576/16; u8 *u = c64x->mem + 0x1200000; u8 *p = c64x->mem + 0x1000000; BRINTF("\n\n\n.======================== Testing put_uyvy (%s) ========================.\n\n", progressive ? "progressive" : "interlaced"); for (i=0; i<256; i++) { p[i] = i%8; BRINTF("Y%-3d ", p[i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[256+i] = i%8 + 128; BRINTF("U%-3d ", p[256+i]); if (i%8==7) { BRINTF("\n"); } } for (i=0; i<64; i++) { p[320+i] = i%8 + 240; BRINTF("V%-3d ", p[320+i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); for (i=0; i<384; i++) { BRINTF("%4d ", p[i]); if (i%8==7) { BRINTF("\n"); } } BRINTF("\n"); memset( u, 0x55, 720*576*2 ); t1 = direct_clock_get_abs_micros(); for (i=0; ic64x_function = C64X_PUT_UYVY_16x16 | C64X_FLAG_TODO; task->c64x_arg[0] = (DAVINCI_C64X_MEM+0x01000000)+0x200000+i*16*16*2; task->c64x_arg[1] = 720 * 2; task->c64x_arg[2] = (DAVINCI_C64X_MEM+0x01000000); task->c64x_arg[3] = 0; c64x_submit_task( c64x, task ); } BRINTF("\n"); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); for (i=0; i<16*16*2; i++) { BRINTF("%02x ", u[i/32*720*2 + i%32]); if (i%32==31) { BRINTF("\n"); } } BRINTF("\n"); dt = t2 - t1; total = num; D_INFO( "Davinci/C64X: BENCHMARK on DSP - %-15s %lld Calls/sec\n", "put_uyvy_16x16()", total * 1000000ULL / dt ); } static inline void bench_mc( DavinciC64x *c64x, int func, int w, int h, bool avg, const char *name ) { int i, x, y, num; long long t1, t2, dt, total; num = 0x1;//0000; u8 *dst = c64x->mem + 0x1200000; u8 *dsr = c64x->mem + 0x1100000; u8 *src = c64x->mem + 0x1000000; BRINTF("\n\n.============ Testing %s ============.\n", name); BRINTF("\n"); BRINTF("SRC REF\n"); for (y=0; yc64x_function = (func << 2) | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM + 0x01200000; task->c64x_arg[1] = 32; task->c64x_arg[2] = DAVINCI_C64X_MEM + 0x01000000; task->c64x_arg[3] = DAVINCI_C64X_MEM + 0x01100000; task->c64x_arg[4] = 32; task->c64x_arg[5] = h; c64x_submit_task( c64x, task ); } davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("-> DST\n"); for (y=0; yc64x_function = (63 << 2) | C64X_FLAG_TODO; task->c64x_arg[0] = nom; task->c64x_arg[1] = den; c64x_submit_task( c64x, task ); davinci_c64x_wait_low( c64x ); BRINTF("%x / %x = %x\n\n\n", nom, den, task->c64x_return); } static inline void bench_dither_argb( DavinciC64x *c64x ) { int i, x, y, num, w = 8, h = 17; long long t1, t2, dt, total; num = 0x10000; u16 *dr = c64x->mem + 0x1200000; u8 *da = c64x->mem + 0x1100000; u32 *src = c64x->mem + 0x1000000; BRINTF("\n\n.======================== Testing dither_argb ========================.\n"); BRINTF("\n"); BRINTF("SOURCE ARGB\n"); for (y=0; yc64x_function = C64X_DITHER_ARGB | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM + 0x01200000; task->c64x_arg[1] = DAVINCI_C64X_MEM + 0x01100000; task->c64x_arg[2] = 64; task->c64x_arg[3] = DAVINCI_C64X_MEM + 0x01000000; task->c64x_arg[4] = 128; task->c64x_arg[5] = w; task->c64x_arg[6] = h; c64x_submit_task( c64x, task ); } davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("-> DST RGB\n"); for (y=0; y DST ALPHA\n"); for (y=0; y> 4); } BRINTF("\n"); } BRINTF("\n"); dt = t2 - t1; total = num; BRINTF( "BENCHMARK on DSP - %-15s %lld Calls/sec\n", "dither_argb", total * 1000000ULL / dt ); } /**********************************************************************************************************************/ /*** 32 bit scaler ****************************************************************************************************/ /**********************************************************************************************************************/ typedef struct { DFBRegion clip; const void *colors; ulong protect; ulong key; } StretchCtx; typedef void (*StretchHVx)( void *dst, int dpitch, const void *src, int spitch, int width, int height, int dst_width, int dst_height, const StretchCtx *ctx ); #define STRETCH_NONE 0 #define STRETCH_SRCKEY 1 #define STRETCH_PROTECT 2 #define STRETCH_SRCKEY_PROTECT 3 #define STRETCH_NUM 4 typedef struct { struct { StretchHVx up[STRETCH_NUM]; StretchHVx down[STRETCH_NUM]; } f[DFB_NUM_PIXELFORMATS]; } StretchFunctionTable; #define DST_FORMAT DSPF_ARGB #define TABLE_NAME stretch_32 #define FUNC_NAME(UPDOWN,K,P,F) stretch_32_ ## UPDOWN ## _ ## K ## P ## _ ## F #define SHIFT_R8 8 #define SHIFT_L8 8 #define X_00FF00FF 0x00ff00ff #define X_FF00FF00 0xff00ff00 #define MASK_RGB 0x00ffffff #define HAS_ALPHA #include #undef DST_FORMAT #undef TABLE_NAME #undef FUNC_NAME #undef SHIFT_R8 #undef SHIFT_L8 #undef X_00FF00FF #undef X_FF00FF00 #undef MASK_RGB #undef HAS_ALPHA static inline void bench_stretch_32( DavinciC64x *c64x, int sw, int sh, int dw, int dh ) { int i, x, y, num; long long t1, t2, dt, total; bool down = (dw < sw) && (dh < sh); #if 0 int SW = (sw + 5) & ~3; int SH = (sh + 5) & ~3; int DW = (dw + 5) & ~3; int DH = (dh + 5) & ~3; #else int SW = sw; int SH = sh; int DW = dw; int DH = dh; #endif num = 1;//0x10000; u32 cpu[DW * DH]; u32 *dst = c64x->mem + 0x1200000; u32 *src = c64x->mem + 0x1000000; memset( src, 0x55, 0x100000 ); for (y=0; y %dx%d ) ========================.\n", sw, sh, dw, dh); BRINTF("\n"); BRINTF("SOURCE IMAGE (%dx%d) [%dx%d]\n", sw, sh, SW, SH); for (y=0; yc64x_function = (down ? C64X_STRETCH_32_down : C64X_STRETCH_32_up ) | C64X_FLAG_TODO; task->c64x_arg[0] = DAVINCI_C64X_MEM + 0x1200000; task->c64x_arg[1] = DAVINCI_C64X_MEM + 0x1000000; task->c64x_arg[2] = (DW * 4) | ((SW * 4) << 16); task->c64x_arg[3] = dh | (dw << 16); task->c64x_arg[4] = sh | (sw << 16); task->c64x_arg[5] = (dw - 1) | ((dh - 1) << 16); task->c64x_arg[6] = 0 | (0 << 16); c64x_submit_task( c64x, task ); } davinci_c64x_write_back_all( c64x ); davinci_c64x_wait_low( c64x ); t2 = direct_clock_get_abs_micros(); BRINTF("-> DSP RESULT (%dx%d) [%dx%d]\n", dw, dh, DW, DH); for (y=0; y CPU RESULT (%dx%d) [%dx%d]\n", dw, dh, DW, DH); for (y=0; yfd = fd; c64x->ctl = map_q; c64x->mem = map_m; c64x->QueueL = map_m + 0x01e00000; D_INFO( "Davinci/C64X: Low ARM %d / DSP %d, High ARM %d / DSP %d\n", c64x->ctl->QL_arm, c64x->ctl->QL_dsp, c64x->ctl->QH_arm, c64x->ctl->QH_dsp ); D_MAGIC_SET( c64x, DavinciC64x ); if (getenv("C64X_TEST")) { // test_load_block( c64x, false ); // test_load_block( c64x, true ); // bench_dither_argb( c64x ); #if 0 bench_uyvy_1( c64x, true ); bench_uyvy_1( c64x, false ); bench_uyvy_2( c64x, true ); bench_uyvy_2( c64x, false ); bench_uyvy_3( c64x, true ); bench_uyvy_3( c64x, false ); #endif #if 0 bench_blend_argb( c64x, 0 ); bench_blend_argb( c64x, 1 ); bench_blend_argb( c64x, 2 ); bench_blend_argb( c64x, 3 ); #endif #if 0 bench_stretch_32( c64x, 2, 1, 16, 1 ); bench_stretch_32( c64x, 2, 2, 16, 2 ); bench_stretch_32( c64x, 2, 1, 3, 1 ); bench_stretch_32( c64x, 4, 1, 6, 1 ); bench_stretch_32( c64x, 3, 1, 2, 1 ); bench_stretch_32( c64x, 6, 1, 4, 1 ); #endif #if 1 bench_fetch_uyvy( c64x, false, 0, 0 ); bench_fetch_uyvy( c64x, false, 1, 0 ); bench_fetch_uyvy( c64x, false, 0, 1 ); bench_fetch_uyvy( c64x, false, 1, 1 ); bench_fetch_uyvy( c64x, true, 0, 0 ); bench_fetch_uyvy( c64x, true, 1, 0 ); bench_fetch_uyvy( c64x, true, 0, 1 ); bench_fetch_uyvy( c64x, true, 1, 1 ); #endif #if 0 bench_put_mc( c64x, false ); bench_put_mc( c64x, true ); bench_put_sum( c64x, false ); bench_put_sum( c64x, true ); bench_sat_mc( c64x ); #endif #if 0 bench_mc( c64x, 32, 8, 8, false, "mc_put_o_8" ); bench_mc( c64x, 33, 8, 8, false, "mc_put_x_8" ); bench_mc( c64x, 34, 8, 8, false, "mc_put_y_8" ); bench_mc( c64x, 35, 8, 8, false, "mc_put_xy_8" ); bench_mc( c64x, 36, 16, 16, false, "mc_put_o_16" ); bench_mc( c64x, 37, 16, 16, false, "mc_put_x_16" ); bench_mc( c64x, 38, 16, 16, false, "mc_put_y_16" ); bench_mc( c64x, 39, 16, 16, false, "mc_put_xy_16" ); #endif #if 0 bench_mc( c64x, 40, 8, 8, true, "mc_avg_o_8" ); bench_mc( c64x, 41, 8, 8, true, "mc_avg_x_8" ); bench_mc( c64x, 42, 8, 8, true, "mc_avg_y_8" ); bench_mc( c64x, 43, 8, 8, true, "mc_avg_xy_8" ); bench_mc( c64x, 44, 16, 16, true, "mc_avg_o_16" ); bench_mc( c64x, 45, 16, 16, true, "mc_avg_x_16" ); bench_mc( c64x, 46, 16, 16, true, "mc_avg_y_16" ); bench_mc( c64x, 47, 16, 16, true, "mc_avg_xy_16" ); #endif #if 0 bench_div( c64x, 1, 3 ); bench_div( c64x, 1000, 333 ); bench_div( c64x, 1000, 334 ); bench_div( c64x, 6666, 2222 ); bench_div( c64x, 1234, 1234 ); bench_div( c64x, 4000, 0 ); bench_div( c64x, 5000, 0 ); bench_div( c64x, 10000, 3 ); bench_div( c64x, 14, 3 ); bench_div( c64x, 0x10000, 0x1000 ); bench_div( c64x, 0x1000, 0x100 ); bench_div( c64x, 0x100000, 2 ); #endif } return DFB_OK; error: if (map_q) munmap( map_q, C64X_QLEN ); close( fd ); return ret; } DFBResult davinci_c64x_close( DavinciC64x *c64x ) { D_MAGIC_ASSERT( c64x, DavinciC64x ); munmap( (void*) c64x->mem, C64X_MLEN ); munmap( (void*) c64x->ctl, C64X_QLEN ); close( c64x->fd ); D_MAGIC_CLEAR( c64x ); return DFB_OK; }