/* Unix SMB/CIFS implementation. oplock processing Copyright (C) Andrew Tridgell 1992-1998 Copyright (C) Jeremy Allison 1998 - 2001 Copyright (C) Volker Lendecke 2005 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "includes.h" /* Current number of oplocks we have outstanding. */ static int32 exclusive_oplocks_open = 0; static int32 level_II_oplocks_open = 0; BOOL global_client_failed_oplock_break = False; extern struct timeval smb_last_time; extern uint32 global_client_caps; extern int smb_read_error; static struct kernel_oplocks *koplocks; /**************************************************************************** Get the number of current exclusive oplocks. ****************************************************************************/ int32 get_number_of_exclusive_open_oplocks(void) { return exclusive_oplocks_open; } /**************************************************************************** Return True if an oplock message is pending. ****************************************************************************/ BOOL oplock_message_waiting(fd_set *fds) { if (koplocks && koplocks->msg_waiting(fds)) return True; return False; } /**************************************************************************** Read an oplock break message from either the oplock UDP fd or the kernel (if kernel oplocks are supported). If timeout is zero then *fds contains the file descriptors that are ready to be read and acted upon. If timeout is non-zero then *fds contains the file descriptors to be selected on for read. The timeout is in milliseconds ****************************************************************************/ void process_kernel_oplocks(void) { fd_set fds; FD_ZERO(&fds); smb_read_error = 0; /* * We need to check for kernel oplocks before going into the select * here, as the EINTR generated by the linux kernel oplock may have * already been eaten. JRA. */ if (!koplocks) { return; } while (koplocks->msg_waiting(&fds)) { files_struct *fsp; struct kernel_oplock_message msg; fsp = koplocks->receive_message(&fds); if (fsp == NULL) { DEBUG(3, ("Kernel oplock message announced, but none " "received\n")); return; } msg.dev = fsp->dev; msg.inode = fsp->inode; msg.file_id = fsp->file_id; message_send_pid(pid_to_procid(sys_getpid()), MSG_SMB_KERNEL_BREAK, &msg, sizeof(msg), True); } } /**************************************************************************** Attempt to set an oplock on a file. Always succeeds if kernel oplocks are disabled (just sets flags). Returns True if oplock set. ****************************************************************************/ BOOL set_file_oplock(files_struct *fsp, int oplock_type) { if (koplocks && !koplocks->set_oplock(fsp, oplock_type)) return False; fsp->oplock_type = oplock_type; fsp->sent_oplock_break = NO_BREAK_SENT; if (oplock_type == LEVEL_II_OPLOCK) level_II_oplocks_open++; else exclusive_oplocks_open++; DEBUG(5,("set_file_oplock: granted oplock on file %s, dev = %x, inode = %.0f, file_id = %lu, \ tv_sec = %x, tv_usec = %x\n", fsp->fsp_name, (unsigned int)fsp->dev, (double)fsp->inode, fsp->file_id, (int)fsp->open_time.tv_sec, (int)fsp->open_time.tv_usec )); return True; } /**************************************************************************** Attempt to release an oplock on a file. Decrements oplock count. ****************************************************************************/ void release_file_oplock(files_struct *fsp) { if ((fsp->oplock_type != NO_OPLOCK) && (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK) && koplocks) { koplocks->release_oplock(fsp); } if (fsp->oplock_type == LEVEL_II_OPLOCK) level_II_oplocks_open--; else if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) exclusive_oplocks_open--; SMB_ASSERT(exclusive_oplocks_open>=0); SMB_ASSERT(level_II_oplocks_open>=0); fsp->oplock_type = NO_OPLOCK; fsp->sent_oplock_break = NO_BREAK_SENT; flush_write_cache(fsp, OPLOCK_RELEASE_FLUSH); } /**************************************************************************** Attempt to downgrade an oplock on a file. Doesn't decrement oplock count. ****************************************************************************/ static void downgrade_file_oplock(files_struct *fsp) { if (koplocks) koplocks->release_oplock(fsp); fsp->oplock_type = LEVEL_II_OPLOCK; exclusive_oplocks_open--; level_II_oplocks_open++; fsp->sent_oplock_break = NO_BREAK_SENT; } /**************************************************************************** Remove a file oplock. Copes with level II and exclusive. Locks then unlocks the share mode lock. Client can decide to go directly to none even if a "break-to-level II" was sent. ****************************************************************************/ BOOL remove_oplock(files_struct *fsp) { SMB_DEV_T dev = fsp->dev; SMB_INO_T inode = fsp->inode; BOOL ret; struct share_mode_lock *lck; /* Remove the oplock flag from the sharemode. */ lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL); if (lck == NULL) { DEBUG(0,("remove_oplock: failed to lock share entry for " "file %s\n", fsp->fsp_name )); return False; } ret = remove_share_oplock(lck, fsp); if (!ret) { DEBUG(0,("remove_oplock: failed to remove share oplock for " "file %s fnum %d, dev = %x, inode = %.0f\n", fsp->fsp_name, fsp->fnum, (unsigned int)dev, (double)inode)); } release_file_oplock(fsp); talloc_free(lck); return ret; } /* * Deal with a reply when a break-to-level II was sent. */ BOOL downgrade_oplock(files_struct *fsp) { SMB_DEV_T dev = fsp->dev; SMB_INO_T inode = fsp->inode; BOOL ret; struct share_mode_lock *lck; lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL); if (lck == NULL) { DEBUG(0,("downgrade_oplock: failed to lock share entry for " "file %s\n", fsp->fsp_name )); return False; } ret = downgrade_share_oplock(lck, fsp); if (!ret) { DEBUG(0,("downgrade_oplock: failed to downgrade share oplock " "for file %s fnum %d, dev = %x, inode = %.0f\n", fsp->fsp_name, fsp->fnum, (unsigned int)dev, (double)inode)); } downgrade_file_oplock(fsp); talloc_free(lck); return ret; } /**************************************************************************** Setup the listening set of file descriptors for an oplock break message either from the UDP socket or from the kernel. Returns the maximum fd used. ****************************************************************************/ int setup_oplock_select_set( fd_set *fds) { int maxfd = 0; if (koplocks && koplocks->notification_fd != -1) { FD_SET(koplocks->notification_fd, fds); maxfd = MAX(maxfd, koplocks->notification_fd); } return maxfd; } /**************************************************************************** Set up an oplock break message. ****************************************************************************/ static char *new_break_smb_message(TALLOC_CTX *mem_ctx, files_struct *fsp, uint8_t cmd) { char *result = TALLOC_ARRAY(mem_ctx, char, smb_size + 8*2 + 0); if (result == NULL) { DEBUG(0, ("talloc failed\n")); return NULL; } memset(result,'\0',smb_size); set_message(result,8,0,True); SCVAL(result,smb_com,SMBlockingX); SSVAL(result,smb_tid,fsp->conn->cnum); SSVAL(result,smb_pid,0xFFFF); SSVAL(result,smb_uid,0); SSVAL(result,smb_mid,0xFFFF); SCVAL(result,smb_vwv0,0xFF); SSVAL(result,smb_vwv2,fsp->fnum); SCVAL(result,smb_vwv3,LOCKING_ANDX_OPLOCK_RELEASE); SCVAL(result,smb_vwv3+1,cmd); return result; } /**************************************************************************** Function to do the waiting before sending a local break. ****************************************************************************/ static void wait_before_sending_break(void) { struct timeval cur_tv; long wait_left = (long)lp_oplock_break_wait_time(); if (wait_left == 0) return; GetTimeOfDay(&cur_tv); wait_left -= ((cur_tv.tv_sec - smb_last_time.tv_sec)*1000) + ((cur_tv.tv_usec - smb_last_time.tv_usec)/1000); if(wait_left > 0) { wait_left = MIN(wait_left, 1000); sys_usleep(wait_left * 1000); } } /**************************************************************************** Ensure that we have a valid oplock. ****************************************************************************/ static files_struct *initial_break_processing(SMB_DEV_T dev, SMB_INO_T inode, unsigned long file_id) { files_struct *fsp = NULL; if( DEBUGLVL( 3 ) ) { dbgtext( "initial_break_processing: called for dev = %x, inode = %.0f file_id = %lu\n", (unsigned int)dev, (double)inode, file_id); dbgtext( "Current oplocks_open (exclusive = %d, levelII = %d)\n", exclusive_oplocks_open, level_II_oplocks_open ); } /* * We need to search the file open table for the * entry containing this dev and inode, and ensure * we have an oplock on it. */ fsp = file_find_dif(dev, inode, file_id); if(fsp == NULL) { /* The file could have been closed in the meantime - return success. */ if( DEBUGLVL( 3 ) ) { dbgtext( "initial_break_processing: cannot find open file with " ); dbgtext( "dev = %x, inode = %.0f file_id = %lu", (unsigned int)dev, (double)inode, file_id); dbgtext( "allowing break to succeed.\n" ); } return NULL; } /* Ensure we have an oplock on the file */ /* * There is a potential race condition in that an oplock could * have been broken due to another udp request, and yet there are * still oplock break messages being sent in the udp message * queue for this file. So return true if we don't have an oplock, * as we may have just freed it. */ if(fsp->oplock_type == NO_OPLOCK) { if( DEBUGLVL( 3 ) ) { dbgtext( "initial_break_processing: file %s ", fsp->fsp_name ); dbgtext( "(dev = %x, inode = %.0f, file_id = %lu) has no oplock.\n", (unsigned int)dev, (double)inode, fsp->file_id ); dbgtext( "Allowing break to succeed regardless.\n" ); } return NULL; } return fsp; } static void oplock_timeout_handler(struct timed_event *te, const struct timeval *now, void *private_data) { files_struct *fsp = private_data; DEBUG(0, ("Oplock break failed -- replying anyway\n")); global_client_failed_oplock_break = True; remove_oplock(fsp); reply_to_oplock_break_requests(fsp); } static void process_oplock_break_message(int msg_type, struct process_id src, void *buf, size_t len) { struct share_mode_entry *msg = buf; files_struct *fsp; char *break_msg; BOOL break_to_level2 = False; BOOL sign_state; if (buf == NULL) { DEBUG(0, ("Got NULL buffer\n")); return; } if (len != sizeof(*msg)) { DEBUG(0, ("Got invalid msg len %d\n", (int)len)); return; } DEBUG(10, ("Got oplock break message from pid %d: %d/%d/%d\n", (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode, (int)msg->share_file_id)); fsp = initial_break_processing(msg->dev, msg->inode, msg->share_file_id); if (fsp == NULL) { /* We hit race here. Break messages are sent, and before we * get to process this message, we have closed the file. Reply * with 'ok, oplock broken' */ DEBUG(3, ("Did not find fsp\n")); message_send_pid(src, MSG_SMB_BREAK_RESPONSE, msg, sizeof(*msg), True); return; } if (fsp->sent_oplock_break != NO_BREAK_SENT) { /* Remember we have to inform the requesting PID when the * client replies */ msg->pid = src; ADD_TO_ARRAY(NULL, struct share_mode_entry, *msg, &fsp->pending_break_messages, &fsp->num_pending_break_messages); return; } if (EXCLUSIVE_OPLOCK_TYPE(msg->op_type) && !EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) { DEBUG(3, ("Already downgraded oplock on %.0f/%.0f: %s\n", (double)fsp->dev, (double)fsp->inode, fsp->fsp_name)); message_send_pid(src, MSG_SMB_BREAK_RESPONSE, msg, sizeof(*msg), True); return; } if ((msg_type == MSG_SMB_BREAK_REQUEST) && (global_client_caps & CAP_LEVEL_II_OPLOCKS) && !koplocks && /* NOTE: we force levelII off for kernel oplocks - * this will change when it is supported */ lp_level2_oplocks(SNUM(fsp->conn))) { break_to_level2 = True; } break_msg = new_break_smb_message(NULL, fsp, break_to_level2 ? OPLOCKLEVEL_II : OPLOCKLEVEL_NONE); if (break_msg == NULL) { exit_server("Could not talloc break_msg\n"); } /* Need to wait before sending a break message to a file of our own */ if (procid_to_pid(&src) == sys_getpid()) { wait_before_sending_break(); } /* Save the server smb signing state. */ sign_state = srv_oplock_set_signing(False); show_msg(break_msg); if (!send_smb(smbd_server_fd(), break_msg)) { exit_server("oplock_break: send_smb failed."); } /* Restore the sign state to what it was. */ srv_oplock_set_signing(sign_state); talloc_free(break_msg); if (msg_type == MSG_SMB_BREAK_REQUEST) { fsp->sent_oplock_break = break_to_level2 ? LEVEL_II_BREAK_SENT:BREAK_TO_NONE_SENT; } else { /* Async level2 request, don't send a reply */ fsp->sent_oplock_break = ASYNC_LEVEL_II_BREAK_SENT; } msg->pid = src; ADD_TO_ARRAY(NULL, struct share_mode_entry, *msg, &fsp->pending_break_messages, &fsp->num_pending_break_messages); if (fsp->oplock_timeout != NULL) { DEBUG(0, ("Logic problem -- have an oplock event hanging " "around\n")); } fsp->oplock_timeout = add_timed_event(NULL, timeval_current_ofs(OPLOCK_BREAK_TIMEOUT, 0), "oplock_timeout_handler", oplock_timeout_handler, fsp); if (fsp->oplock_timeout == NULL) { DEBUG(0, ("Could not add oplock timeout handler\n")); } } static void process_kernel_oplock_break(int msg_type, struct process_id src, void *buf, size_t len) { struct kernel_oplock_message *msg = buf; files_struct *fsp; char *break_msg; BOOL sign_state; if (buf == NULL) { DEBUG(0, ("Got NULL buffer\n")); return; } if (len != sizeof(*msg)) { DEBUG(0, ("Got invalid msg len %d\n", (int)len)); return; } DEBUG(10, ("Got kernel oplock break message from pid %d: %d/%d/%d\n", (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode, (int)msg->file_id)); fsp = initial_break_processing(msg->dev, msg->inode, msg->file_id); if (fsp == NULL) { DEBUG(3, ("Got a kernel oplock break message for a file " "I don't know about\n")); return; } if (fsp->sent_oplock_break != NO_BREAK_SENT) { /* This is ok, kernel oplocks come in completely async */ DEBUG(3, ("Got a kernel oplock request while waiting for a " "break reply\n")); return; } break_msg = new_break_smb_message(NULL, fsp, OPLOCKLEVEL_NONE); if (break_msg == NULL) { exit_server("Could not talloc break_msg\n"); } /* Save the server smb signing state. */ sign_state = srv_oplock_set_signing(False); show_msg(break_msg); if (!send_smb(smbd_server_fd(), break_msg)) { exit_server("oplock_break: send_smb failed."); } /* Restore the sign state to what it was. */ srv_oplock_set_signing(sign_state); talloc_free(break_msg); fsp->sent_oplock_break = BREAK_TO_NONE_SENT; } void reply_to_oplock_break_requests(files_struct *fsp) { int i; for (i=0; inum_pending_break_messages; i++) { struct share_mode_entry *msg = &fsp->pending_break_messages[i]; message_send_pid(msg->pid, MSG_SMB_BREAK_RESPONSE, msg, sizeof(*msg), True); } SAFE_FREE(fsp->pending_break_messages); fsp->num_pending_break_messages = 0; if (fsp->oplock_timeout != NULL) { talloc_free(fsp->oplock_timeout); fsp->oplock_timeout = NULL; } return; } static void process_oplock_break_response(int msg_type, struct process_id src, void *buf, size_t len) { struct share_mode_entry *msg = buf; if (buf == NULL) { DEBUG(0, ("Got NULL buffer\n")); return; } if (len != sizeof(*msg)) { DEBUG(0, ("Got invalid msg len %d\n", (int)len)); return; } DEBUG(10, ("Got oplock break response from pid %d: %d/%d/%d mid %d\n", (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode, (int)msg->share_file_id, (int)msg->op_mid)); /* Here's the hack from open.c, store the mid in the 'port' field */ schedule_deferred_open_smb_message(msg->op_mid); } static void process_open_retry_message(int msg_type, struct process_id src, void *buf, size_t len) { struct share_mode_entry *msg = buf; if (buf == NULL) { DEBUG(0, ("Got NULL buffer\n")); return; } if (len != sizeof(*msg)) { DEBUG(0, ("Got invalid msg len %d\n", (int)len)); return; } DEBUG(10, ("Got open retry msg from pid %d: %d/%d mid %d\n", (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode, (int)msg->op_mid)); schedule_deferred_open_smb_message(msg->op_mid); } /**************************************************************************** This function is called on any file modification or lock request. If a file is level 2 oplocked then it must tell all other level 2 holders to break to none. ****************************************************************************/ void release_level_2_oplocks_on_change(files_struct *fsp) { int i; struct share_mode_lock *lck; /* * If this file is level II oplocked then we need * to grab the shared memory lock and inform all * other files with a level II lock that they need * to flush their read caches. We keep the lock over * the shared memory area whilst doing this. */ if (!LEVEL_II_OPLOCK_TYPE(fsp->oplock_type)) return; lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL); if (lck == NULL) { DEBUG(0,("release_level_2_oplocks_on_change: failed to lock " "share mode entry for file %s.\n", fsp->fsp_name )); } DEBUG(10,("release_level_2_oplocks_on_change: num_share_modes = %d\n", lck->num_share_modes )); if (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) { /* See if someone else has already downgraded us, then we don't have to do anything */ for (i=0; inum_share_modes; i++) { struct share_mode_entry *e = &lck->share_modes[i]; if ((e->op_type == NO_OPLOCK) && (e->share_file_id == fsp->file_id) && (e->dev == fsp->dev) && (e->inode == fsp->inode) && (procid_is_me(&e->pid))) { /* We're done */ fsp->oplock_type = NO_OPLOCK; talloc_free(lck); return; } } } for(i = 0; i < lck->num_share_modes; i++) { struct share_mode_entry *share_entry = &lck->share_modes[i]; /* * As there could have been multiple writes waiting at the * lock_share_entry gate we may not be the first to * enter. Hence the state of the op_types in the share mode * entries may be partly NO_OPLOCK and partly LEVEL_II * oplock. It will do no harm to re-send break messages to * those smbd's that are still waiting their turn to remove * their LEVEL_II state, and also no harm to ignore existing * NO_OPLOCK states. JRA. */ DEBUG(10,("release_level_2_oplocks_on_change: " "share_entry[%i]->op_type == %d\n", i, share_entry->op_type )); if ((share_entry->op_type == NO_OPLOCK) || (share_entry->op_type == FAKE_LEVEL_II_OPLOCK)) { continue; } /* Paranoia .... */ if (EXCLUSIVE_OPLOCK_TYPE(share_entry->op_type)) { DEBUG(0,("release_level_2_oplocks_on_change: PANIC. " "share mode entry %d is an exlusive " "oplock !\n", i )); talloc_free(lck); abort(); } message_send_pid(share_entry->pid, MSG_SMB_ASYNC_LEVEL2_BREAK, share_entry, sizeof(*share_entry), True); } remove_all_share_oplocks(lck, fsp); talloc_free(lck); } /**************************************************************************** Setup oplocks for this process. ****************************************************************************/ BOOL init_oplocks(void) { DEBUG(3,("open_oplock_ipc: opening loopback UDP socket.\n")); message_register(MSG_SMB_BREAK_REQUEST, process_oplock_break_message); message_register(MSG_SMB_ASYNC_LEVEL2_BREAK, process_oplock_break_message); message_register(MSG_SMB_BREAK_RESPONSE, process_oplock_break_response); message_register(MSG_SMB_KERNEL_BREAK, process_kernel_oplock_break); message_register(MSG_SMB_OPEN_RETRY, process_open_retry_message); if (lp_kernel_oplocks()) { #if HAVE_KERNEL_OPLOCKS_IRIX koplocks = irix_init_kernel_oplocks(); #elif HAVE_KERNEL_OPLOCKS_LINUX koplocks = linux_init_kernel_oplocks(); #endif } return True; }