List:Commits« Previous MessageNext Message »
From:tomas Date:June 5 2007 8:49am
Subject:bk commit into 5.1 tree (tomas:1.2535)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-05 08:49:11+02:00, tomas@stripped +18 -0
  try odirect patch for LCP and BACKUP

  mysql-test/ndb/ndb_config_2_node.ini@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +1 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/include/mgmapi/mgmapi_config_parameters.h@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +2 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/include/ndb_global.h.in@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +2 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/backup/Backup.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +42 -7
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/backup/Backup.hpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +2 -1
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +9 -1
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/backup/BackupInit.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +5 -2
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +14 -10
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +2 -3
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +9 -5
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +11 -3
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +114 -37
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +4 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/blocks/restore.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +3 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/vm/SimulatedBlock.cpp@stripped, 2007-06-05 08:49:08+02:00,
tomas@stripped +22 -3
    try odirect patch for LCP and BACKUP

  storage/ndb/src/kernel/vm/SimulatedBlock.hpp@stripped, 2007-06-05 08:49:09+02:00,
tomas@stripped +1 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/src/mgmsrv/ConfigInfo.cpp@stripped, 2007-06-05 08:49:09+02:00,
tomas@stripped +12 -0
    try odirect patch for LCP and BACKUP

  storage/ndb/tools/restore/Restore.cpp@stripped, 2007-06-05 08:49:09+02:00,
tomas@stripped +24 -5
    try odirect patch for LCP and BACKUP

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	whalegate.ndb.mysql.com
# Root:	/home/tomas/mysql-5.1-wl1190-merge

--- 1.17/storage/ndb/src/kernel/blocks/restore.cpp	2007-03-20 16:38:17 +01:00
+++ 1.18/storage/ndb/src/kernel/blocks/restore.cpp	2007-06-05 08:49:08 +02:00
@@ -557,6 +557,9 @@
       case BackupFormat::GCP_ENTRY:
 	parse_gcp_entry(signal, file_ptr, data, len);
 	break;
+      case BackupFormat::EMPTY_ENTRY:
+        // skip
+        break;
       case 0x4e444242: // 'NDBB'
 	if (check_file_version(signal, ntohl(* (data+2))) == 0)
 	{

--- 1.33/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2007-03-07 17:50:22 +01:00
+++ 1.34/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2007-06-05 08:49:08 +02:00
@@ -113,6 +113,8 @@
 
 #define CFG_DB_MEMREPORT_FREQUENCY    166
 
+#define CFG_DB_O_DIRECT               168
+
 #define CFG_DB_SGA                    198 /* super pool mem */
 #define CFG_DB_DATA_MEM_2             199 /* used in special build in 5.1 */
 

--- 1.17/storage/ndb/include/ndb_global.h.in	2007-02-23 18:54:20 +01:00
+++ 1.18/storage/ndb/include/ndb_global.h.in	2007-06-05 08:49:08 +02:00
@@ -144,4 +144,6 @@
 #define MAX(x,y) (((x)>(y))?(x):(y))
 #endif
 
+#define NDB_O_DIRECT_WRITE_ALIGNMENT 512
+
 #endif

--- 1.109/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2007-04-05 15:00:14 +02:00
+++ 1.110/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2007-06-05 08:49:09 +02:00
@@ -1314,6 +1314,18 @@
     "0",
     STR_VALUE(MAX_INT_RNIL) },
   
+  {
+    CFG_DB_O_DIRECT,
+    "ODirect",
+    DB_TOKEN,
+    "Use O_DIRECT file write/read when possible",
+    ConfigInfo::CI_USED,
+    true,
+    ConfigInfo::CI_BOOL,
+    "false",
+    "false",
+    "true"},
+
   /***************************************************************************
    * API
    ***************************************************************************/

--- 1.65/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2007-03-06 18:36:38 +01:00
+++ 1.66/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2007-06-05 08:49:08 +02:00
@@ -2771,6 +2771,8 @@
   c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
   filePtr.p->m_flags |= BackupFile::BF_OPENING;
 
+  if (c_defaults.m_o_direct)
+    req->fileFlags |= FsOpenReq::OM_DIRECT;
   req->userPointer = filePtr.i;
   FsOpenReq::setVersion(req->fileNumber, 2);
   FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
@@ -3745,12 +3747,31 @@
 }
 
 bool
-Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo)
+Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record)
 {
   Uint32 * tmp;
   const Uint32 footSz = sizeof(BackupFormat::DataFile::FragmentFooter) >> 2;
+  Uint32 sz = footSz + 1;
 
-  if(dataBuffer.getWritePtr(&tmp, footSz + 1)) {
+  if (fill_record)
+  {
+    Uint32 * new_tmp;
+    if (!dataBuffer.getWritePtr(&tmp, sz))
+      return false;
+    new_tmp = tmp + sz;
+
+    if ((UintPtr)new_tmp & (sizeof(Page32)-1))
+    {
+      /* padding is needed to get full write */
+      new_tmp += 2 /* to fit empty header minimum 2 words*/;
+      new_tmp = (Uint32 *)(((UintPtr)new_tmp + sizeof(Page32)-1) &
+                            ~(UintPtr)(sizeof(Page32)-1));
+      /* new write sz */
+      sz = new_tmp - tmp;
+    }
+  }
+
+  if(dataBuffer.getWritePtr(&tmp, sz)) {
     jam();
     * tmp = 0; // Finish record stream
     tmp++;
@@ -3762,7 +3783,17 @@
     foot->FragmentNo    = htonl(fragNo);
     foot->NoOfRecords   = htonl(noOfRecords);
     foot->Checksum      = htonl(0);
-    dataBuffer.updateWritePtr(footSz + 1);
+
+    if (sz != footSz + 1)
+    {
+      tmp += footSz;
+      memset(tmp, 0, (sz - footSz - 1) * 4);
+      *tmp = htonl(BackupFormat::EMPTY_ENTRY);
+      tmp++;
+      *tmp = htonl(sz - footSz - 1);
+    }
+
+    dataBuffer.updateWritePtr(sz);
     return true;
   }//if
   return false;
@@ -3864,8 +3895,13 @@
     return;
   }//if
     
+  BackupRecordPtr ptr LINT_SET_PTR;
+  c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
+
   OperationRecord & op = filePtr.p->operation;
-  if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo)) {
+  if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo,
+                      c_defaults.m_o_direct))
+  {
     jam();
     signal->theData[0] = BackupContinueB::BUFFER_FULL_FRAG_COMPLETE;
     signal->theData[1] = filePtr.i;
@@ -3875,9 +3911,6 @@
   
   filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD;
   
-  BackupRecordPtr ptr LINT_SET_PTR;
-  c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
-
   if (ptr.p->is_lcp())
   {
     ptr.p->slaveState.setState(STOPPING);
@@ -4914,6 +4947,8 @@
     FsOpenReq::OM_CREATE | 
     FsOpenReq::OM_APPEND |
     FsOpenReq::OM_AUTOSYNC;
+  if (c_defaults.m_o_direct)
+    req->fileFlags |= FsOpenReq::OM_DIRECT;
   FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
   req->auto_sync_size = c_defaults.m_disk_synch_size;
   

--- 1.28/storage/ndb/src/kernel/blocks/backup/Backup.hpp	2007-01-06 01:21:21 +01:00
+++ 1.29/storage/ndb/src/kernel/blocks/backup/Backup.hpp	2007-06-05 08:49:08 +02:00
@@ -240,7 +240,7 @@
      * Once per fragment
      */
     bool newFragment(Uint32 tableId, Uint32 fragNo);
-    bool fragComplete(Uint32 tableId, Uint32 fragNo);
+    bool fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record);
     
     /**
      * Once per scan frag (next) req/conf
@@ -534,6 +534,7 @@
     Uint32 m_disk_write_speed;
     Uint32 m_disk_synch_size;
     Uint32 m_diskless;
+    Uint32 m_o_direct;
   };
   
   /**

--- 1.9/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp	2006-12-23 20:20:15 +01:00
+++ 1.10/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp	2007-06-05 08:49:08 +02:00
@@ -32,7 +32,8 @@
     TABLE_LIST        = 4,
     TABLE_DESCRIPTION = 5,
     GCP_ENTRY         = 6,
-    FRAGMENT_INFO     = 7
+    FRAGMENT_INFO     = 7,
+    EMPTY_ENTRY       = 8
   };
 
   struct FileHeader {
@@ -92,6 +93,13 @@
       Uint32 FragmentNo;
       Uint32 NoOfRecords;
       Uint32 Checksum;
+    };
+
+    /* optional padding for O_DIRECT */
+    struct EmptyEntry {
+      Uint32 SectionType;
+      Uint32 SectionLength;
+      /* not used data */
     };
   };
 

--- 1.26/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp	2006-12-23 20:20:15 +01:00
+++ 1.27/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp	2007-06-05 08:49:08 +02:00
@@ -148,10 +148,13 @@
   c_defaults.m_disk_write_speed = 10 * (1024 * 1024);
   c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024);
   c_defaults.m_disk_synch_size = 4 * (1024 * 1024);
-  
+  c_defaults.m_o_direct = true;
+
   Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0;
   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, 
 					&c_defaults.m_diskless));
+  ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT,
+                            &c_defaults.m_o_direct);
   ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR,
 			    &c_defaults.m_disk_write_speed_sr);
   ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED,
@@ -204,7 +207,7 @@
     / sizeof(Page32);
   // We need to allocate an additional of 2 pages. 1 page because of a bug in
   // ArrayPool and another one for DICTTAINFO.
-  c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2); 
+  c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2, true); 
   
   { // Init all tables
     SLList<Table> tables(c_tablePool);

--- 1.8/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp	2006-12-23 20:20:15 +01:00
+++ 1.9/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp	2007-06-05 08:49:08 +02:00
@@ -270,8 +270,8 @@
     
     * ptr = &Tp[Tr];
 
-    DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d",
-		   Tr, Tw, Ts, Tm, sz1, * sz));
+    DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d",
+		   Tr, Tmw, Ts, Tm, sz1, * sz));
 
     return true;
   }
@@ -279,8 +279,8 @@
   if(!m_eof){
     * _eof = false;
     
-    DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> false",
-		   Tr, Tw, Ts, Tm, sz1));
+    DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> false",
+		   Tr, Tmw, Ts, Tm, sz1));
     
     return false;
   }
@@ -289,8 +289,8 @@
   * _eof = true;
   * ptr = &Tp[Tr];
 
-  DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d eof",
-		 Tr, Tw, Ts, Tm, sz1, * sz));
+  DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d eof",
+		 Tr, Tmw, Ts, Tm, sz1, * sz));
   
   return false;
 }
@@ -316,13 +316,13 @@
   if(sz1 > sz){ // Note at least 1 word of slack
     * ptr = &Tp[Tw];
 
-    DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> true",
-		   sz, Tr, Tw, Ts, sz1));
+    DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> true",
+		   sz, Tw, sz1));
     return true;
   }
 
-  DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> false",
-		 sz, Tr, Tw, Ts, sz1));
+  DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> false",
+		 sz, Tw, sz1));
 
   return false;
 }
@@ -339,11 +339,15 @@
   m_free -= sz;
   if(Tnew < Ts){
     m_writeIndex = Tnew;
+    DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d",
+                   sz, m_writeIndex));
     return;
   }
 
   memcpy(Tp, &Tp[Ts], (Tnew - Ts) << 2);
   m_writeIndex = Tnew - Ts;
+  DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d",
+                 sz, m_writeIndex));
 }
 
 inline

--- 1.53/storage/ndb/tools/restore/Restore.cpp	2007-03-27 08:18:32 +02:00
+++ 1.54/storage/ndb/tools/restore/Restore.cpp	2007-06-05 08:49:09 +02:00
@@ -873,13 +873,32 @@
   
   debug << "RestoreDataIterator::getNextFragment" << endl;
   
-  if (buffer_read(&Header, sizeof(Header), 1) != 1){
+  while (1)
+  {
+    /* read first part of header */
+    if (buffer_read(&Header, 8, 1) != 1)
+    {
+      ret = 0;
+      return false;
+    } // if
+
+    /* skip if EMPTY_ENTRY */
+    Header.SectionType  = ntohl(Header.SectionType);
+    Header.SectionLength  = ntohl(Header.SectionLength);
+    if (Header.SectionType == BackupFormat::EMPTY_ENTRY)
+    {
+      void *tmp;
+      buffer_get_ptr(&tmp, Header.SectionLength*4-8, 1);
+      continue;
+    }
+    break;
+  }
+  /* read rest of header */
+  if (buffer_read(((char*)&Header)+8, sizeof(Header)-8, 1) != 1)
+  {
     ret = 0;
     return false;
-  } // if
-  
-  Header.SectionType  = ntohl(Header.SectionType);
-  Header.SectionLength  = ntohl(Header.SectionLength);
+  }
   Header.TableId  = ntohl(Header.TableId);
   Header.FragmentNo  = ntohl(Header.FragmentNo);
   Header.ChecksumType  = ntohl(Header.ChecksumType);

--- 1.63/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-03-27 16:06:45 +02:00
+++ 1.64/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-06-05 08:49:08 +02:00
@@ -115,9 +115,6 @@
 /* ------------------------------------------------------------------------- */
 /*       VARIOUS CONSTANTS USED AS FLAGS TO THE FILE MANAGER.                */
 /* ------------------------------------------------------------------------- */
-#define ZOPEN_READ 0
-#define ZOPEN_WRITE 1
-#define ZOPEN_READ_WRITE 2
 #define ZVAR_NO_LOG_PAGE_WORD 1
 #define ZLIST_OF_PAIRS 0
 #define ZLIST_OF_PAIRS_SYNCH 16
@@ -2688,6 +2685,7 @@
   UintR clfoFileSize;
 
   LogPageRecord *logPageRecord;
+  void *logPageRecordUnaligned;
   LogPageRecordPtr logPagePtr;
   UintR cfirstfreeLogPage;
   UintR clogPageFileSize;
@@ -2891,6 +2889,7 @@
   UintR ctransidHash[1024];
   
   Uint32 c_diskless;
+  Uint32 c_o_direct;
   Uint32 c_error_insert_table_id;
   
 public:

--- 1.24/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2007-03-27 16:06:45 +02:00
+++ 1.25/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2007-06-05 08:49:08 +02:00
@@ -49,6 +49,7 @@
   logFileRecord = 0;
   logFileOperationRecord = 0;
   logPageRecord = 0;
+  logPageRecordUnaligned= 0;
   pageRefRecord = 0;
   tablerec = 0;
   tcConnectionrec = 0;
@@ -105,10 +106,13 @@
 		sizeof(LogFileOperationRecord), 
 		clfoFileSize);
 
-  logPageRecord = (LogPageRecord*)allocRecord("LogPageRecord",
-					      sizeof(LogPageRecord),
-					      clogPageFileSize,
-					      false);
+  logPageRecord =
+    (LogPageRecord*)allocRecordAligned("LogPageRecord",
+                                       sizeof(LogPageRecord),
+                                       clogPageFileSize,
+                                       &logPageRecordUnaligned,
+                                       NDB_O_DIRECT_WRITE_ALIGNMENT,
+                                       false);
 
   pageRefRecord = (PageRefRecord*)allocRecord("PageRefRecord",
 					      sizeof(PageRefRecord),
@@ -380,7 +384,7 @@
 		sizeof(LogFileOperationRecord), 
 		clfoFileSize);
   
-  deallocRecord((void**)&logPageRecord,
+  deallocRecord((void**)&logPageRecordUnaligned,
 		"LogPageRecord",
 		sizeof(LogPageRecord),
 		clogPageFileSize);

--- 1.154/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-27 16:06:45 +02:00
+++ 1.155/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-06-05 08:49:08 +02:00
@@ -1031,6 +1031,8 @@
   cmaxAccOps = cscanrecFileSize * MAX_PARALLEL_OP_PER_SCAN;
 
   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &c_diskless));
+  c_o_direct = true;
+  ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT, &c_o_direct);
   
   Uint32 tmp= 0;
   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_FRAG, &tmp));
@@ -13291,7 +13293,9 @@
   signal->theData[3] = olfLogFilePtr.p->fileName[1];
   signal->theData[4] = olfLogFilePtr.p->fileName[2];
   signal->theData[5] = olfLogFilePtr.p->fileName[3];
-  signal->theData[6] = ZOPEN_READ_WRITE | FsOpenReq::OM_AUTOSYNC;
+  signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC;
+  if (c_o_direct)
+    signal->theData[6] |= FsOpenReq::OM_DIRECT;
   req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
 }//Dblqh::openFileRw()
@@ -13311,7 +13315,9 @@
   signal->theData[3] = logFilePtr.p->fileName[1];
   signal->theData[4] = logFilePtr.p->fileName[2];
   signal->theData[5] = logFilePtr.p->fileName[3];
-  signal->theData[6] = 0x302 | FsOpenReq::OM_AUTOSYNC;
+  signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_TRUNCATE |
FsOpenReq::OM_CREATE | FsOpenReq::OM_AUTOSYNC;
+  if (c_o_direct)
+    signal->theData[6] |= FsOpenReq::OM_DIRECT;
   req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
 }//Dblqh::openLogfileInit()
@@ -13347,7 +13353,9 @@
     signal->theData[3] = onlLogFilePtr.p->fileName[1];
     signal->theData[4] = onlLogFilePtr.p->fileName[2];
     signal->theData[5] = onlLogFilePtr.p->fileName[3];
-    signal->theData[6] = 2 | FsOpenReq::OM_AUTOSYNC;
+    signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC;
+    if (c_o_direct)
+      signal->theData[6] |= FsOpenReq::OM_DIRECT;
     req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
     sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
   }//if

--- 1.38/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp	2006-12-23 20:20:18 +01:00
+++ 1.39/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp	2007-06-05 08:49:08 +02:00
@@ -163,7 +163,12 @@
   theStartFlag = true;
   // Create write buffer for bigger writes
   theWriteBufferSize = WRITEBUFFERSIZE;
-  theWriteBuffer = (char *) ndbd_malloc(theWriteBufferSize); 
+  theWriteBufferUnaligned = (char *) ndbd_malloc(theWriteBufferSize +
+                                                 NDB_O_DIRECT_WRITE_ALIGNMENT-1);
+  theWriteBuffer = (char *)
+    (((UintPtr)theWriteBufferUnaligned + NDB_O_DIRECT_WRITE_ALIGNMENT - 1) &
+     ~(UintPtr)(NDB_O_DIRECT_WRITE_ALIGNMENT - 1));
+
   NdbMutex_Unlock(theStartMutexPtr);
   NdbCondition_Signal(theStartConditionPtr);
   
@@ -247,6 +252,78 @@
 static char g_odirect_readbuf[2*GLOBAL_PAGE_SIZE -1];
 #endif
 
+int
+AsyncFile::check_odirect_write(Uint32 flags, int& new_flags, int mode)
+{
+  assert(new_flags & (O_CREAT | O_TRUNC));
+#ifdef O_DIRECT
+  int ret;
+  char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) &
~(GLOBAL_PAGE_SIZE - 1));
+  while (((ret = ::write(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && 
+         (errno == EINTR));
+  if (ret == -1)
+  {
+    new_flags &= ~O_DIRECT;
+    ndbout_c("%s Failed to write using O_DIRECT, disabling", 
+             theFileName.c_str());
+  }
+  
+  close(theFd);
+  theFd = ::open(theFileName.c_str(), new_flags, mode);
+  if (theFd == -1)
+    return errno;
+#endif
+
+  return 0;
+}
+
+int
+AsyncFile::check_odirect_read(Uint32 flags, int &new_flags, int mode)
+{
+#ifdef O_DIRECT
+  int ret;
+  char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) &
~(GLOBAL_PAGE_SIZE - 1));
+  while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && 
+         (errno == EINTR));
+  if (ret == -1)
+  {
+    ndbout_c("%s Failed to read using O_DIRECT, disabling", 
+             theFileName.c_str());
+    goto reopen;
+  }
+  
+  if(lseek(theFd, 0, SEEK_SET) != 0)
+  {
+    return errno;
+  }
+  
+  if ((flags & FsOpenReq::OM_CHECK_SIZE) == 0)
+  {
+    struct stat buf;
+    if ((fstat(theFd, &buf) == -1))
+    {
+      return errno;
+    } 
+    else if ((buf.st_size % GLOBAL_PAGE_SIZE) != 0)
+    {
+      ndbout_c("%s filesize not a multiple of %d, disabling O_DIRECT", 
+               theFileName.c_str(), GLOBAL_PAGE_SIZE);
+      goto reopen;
+    }
+  }
+  
+  return 0;
+  
+reopen:
+  close(theFd);
+  new_flags &= ~O_DIRECT;
+  theFd = ::open(theFileName.c_str(), new_flags, mode);
+  if (theFd == -1)
+    return errno;  
+#endif
+  return 0;
+}
+
 void AsyncFile::openReq(Request* request)
 {  
   m_auto_sync_freq = 0;
@@ -312,7 +389,7 @@
   }
 #else
   Uint32 flags = request->par.open.flags;
-  Uint32 new_flags = 0;
+  int new_flags = 0;
 
   // Convert file open flags from Solaris to Liux
   if (flags & FsOpenReq::OM_CREATE)
@@ -343,10 +420,6 @@
   {
     new_flags |= O_DIRECT;
   }
-#elif defined O_SYNC
-  {
-    flags |= FsOpenReq::OM_SYNC;
-  }
 #endif
   
   if ((flags & FsOpenReq::OM_SYNC) && ! (flags & FsOpenReq::OM_INIT))
@@ -355,15 +428,19 @@
     new_flags |= O_SYNC;
 #endif
   }
-    
+
+  const char * rw = "";
   switch(flags & 0x3){
   case FsOpenReq::OM_READONLY:
+    rw = "r";
     new_flags |= O_RDONLY;
     break;
   case FsOpenReq::OM_WRITEONLY:
+    rw = "w";
     new_flags |= O_WRONLY;
     break;
   case FsOpenReq::OM_READWRITE:
+    rw = "rw";
     new_flags |= O_RDWR;
     break;
   default:
@@ -404,11 +481,6 @@
 	if (new_flags & O_DIRECT)
 	{
 	  new_flags &= ~O_DIRECT;
-	  flags |= FsOpenReq::OM_SYNC;
-#ifdef O_SYNC
-	  if (! (flags & FsOpenReq::OM_INIT))
-	    new_flags |= O_SYNC;
-#endif
 	  goto no_odirect;
 	}
 #endif
@@ -421,11 +493,6 @@
     else if (new_flags & O_DIRECT)
     {
       new_flags &= ~O_DIRECT;
-      flags |= FsOpenReq::OM_SYNC;
-#ifdef O_SYNC
-      if (! (flags & FsOpenReq::OM_INIT))
-	new_flags |= O_SYNC;
-#endif
       goto no_odirect;
     }
 #endif
@@ -512,7 +579,6 @@
 	{
 	  ndbout_c("error on first write(%d), disable O_DIRECT", err);
 	  new_flags &= ~O_DIRECT;
-	  flags |= FsOpenReq::OM_SYNC;
 	  close(theFd);
 	  theFd = ::open(theFileName.c_str(), new_flags, mode);
 	  if (theFd != -1)
@@ -532,26 +598,32 @@
   else if (flags & FsOpenReq::OM_DIRECT)
   {
 #ifdef O_DIRECT
-    do {
-      int ret;
-      char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) &
~(GLOBAL_PAGE_SIZE - 1));
-      while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && (errno ==
EINTR));
-      if (ret == -1)
-      {
-	ndbout_c("%s Failed to read using O_DIRECT, disabling", theFileName.c_str());
-	flags |= FsOpenReq::OM_SYNC;
-	flags |= FsOpenReq::OM_INIT;
-	break;
-      }
-      if(lseek(theFd, 0, SEEK_SET) != 0)
-      {
-	request->error = errno;
-	return;
-      }
-    } while (0);
+    if (flags & (FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE))
+    {
+      request->error = check_odirect_write(flags, new_flags, mode);
+    }
+    else
+    {
+      request->error = check_odirect_read(flags, new_flags, mode);
+    }
+    
+    if (request->error)
+      return;
 #endif
   }
-
+#ifdef VM_TRACE
+  if (flags & FsOpenReq::OM_DIRECT)
+  {
+#ifdef O_DIRECT
+    ndbout_c("%s %s O_DIRECT: %d",
+             theFileName.c_str(), rw,
+             !!(new_flags & O_DIRECT));
+#else
+    ndbout_c("%s %s O_DIRECT: 0",
+             theFileName.c_str(), rw);
+#endif
+  }
+#endif  
   if ((flags & FsOpenReq::OM_SYNC) && (flags & FsOpenReq::OM_INIT))
   {
 #ifdef O_SYNC
@@ -562,6 +634,10 @@
     new_flags &= ~(O_CREAT | O_TRUNC);
     new_flags |= O_SYNC;
     theFd = ::open(theFileName.c_str(), new_flags, mode);
+    if (theFd == -1)
+    {
+      request->error = errno;
+    }
 #endif
   }
 #endif
@@ -1079,7 +1155,8 @@
 void AsyncFile::endReq()
 {
   // Thread is ended with return
-  if (theWriteBuffer) ndbd_free(theWriteBuffer, theWriteBufferSize);
+  if (theWriteBufferUnaligned)
+    ndbd_free(theWriteBufferUnaligned, theWriteBufferSize);
 }
 
 

--- 1.10/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp	2007-01-24 18:57:03 +01:00
+++ 1.11/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp	2007-06-05 08:49:08 +02:00
@@ -234,9 +234,13 @@
   bool   theStartFlag;
   int theWriteBufferSize;
   char* theWriteBuffer;
+  void* theWriteBufferUnaligned;
   
   size_t m_write_wo_sync;  // Writes wo/ sync
   size_t m_auto_sync_freq; // Auto sync freq in bytes
+
+  int check_odirect_read(Uint32 flags, int&new_flags, int mode);
+  int check_odirect_write(Uint32 flags, int&new_flags, int mode);
 public:
   SimulatedBlock& m_fs;
   Ptr<GlobalPage> m_page_ptr;

--- 1.39/storage/ndb/src/kernel/vm/SimulatedBlock.cpp	2006-12-27 10:58:04 +01:00
+++ 1.40/storage/ndb/src/kernel/vm/SimulatedBlock.cpp	2007-06-05 08:49:08 +02:00
@@ -38,6 +38,9 @@
 #include <AttributeDescriptor.hpp>
 #include <NdbSqlUtil.hpp>
 
+#include <EventLogger.hpp>
+extern EventLogger g_eventLogger;
+
 #define ljamEntry() jamEntryLine(30000 + __LINE__)
 #define ljam() jamLine(30000 + __LINE__)
 
@@ -655,13 +658,19 @@
   return sb->theBATSize;
 }
 
+void* SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear,
Uint32 paramId)
+{
+  return allocRecordAligned(type, s, n, 0, 0, clear, paramId);
+}
+
 void* 
-SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, Uint32
paramId) 
+SimulatedBlock::allocRecordAligned(const char * type, size_t s, size_t n, void
**unaligned_buffer, Uint32 align, bool clear, Uint32 paramId)
 {
 
   void * p = NULL;
-  size_t size = n*s;
-  Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s);
+  Uint32 over_alloc = unaligned_buffer ? (align - 1) : 0;
+  size_t size = n*s + over_alloc;
+  Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s) + over_alloc;
   refresh_watch_dog(); 
   if (real_size > 0){
 #ifdef VM_TRACE_MEM
@@ -703,6 +712,16 @@
       }
       refresh_watch_dog(); 
       memset(ptr, 0, size);
+    }
+    if (unaligned_buffer)
+    {
+      *unaligned_buffer = p;
+      p = (void *)(((UintPtr)p + over_alloc) & ~(UintPtr)(over_alloc));
+#ifdef VM_TRACE
+      g_eventLogger.info("'%s' (%u) %llu %llu, alignment correction %u bytes",
+                         type, align, (Uint64)p, (Uint64)p+n*s,
+                         (Uint32)((UintPtr)p - (UintPtr)*unaligned_buffer));
+#endif
     }
   }
   return p;

--- 1.30/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2007-01-24 06:20:36 +01:00
+++ 1.31/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2007-06-05 08:49:09 +02:00
@@ -377,6 +377,7 @@
    *
    */
   void* allocRecord(const char * type, size_t s, size_t n, bool clear = true, Uint32
paramId = 0);
+  void* allocRecordAligned(const char * type, size_t s, size_t n, void
**unaligned_buffer, Uint32 align = NDB_O_DIRECT_WRITE_ALIGNMENT, bool clear = true,
Uint32 paramId = 0);
   
   /**
    * Deallocate record

--- 1.23/mysql-test/ndb/ndb_config_2_node.ini	2007-04-02 22:32:55 +02:00
+++ 1.24/mysql-test/ndb/ndb_config_2_node.ini	2007-06-05 08:49:08 +02:00
@@ -12,6 +12,7 @@
 TimeBetweenGlobalCheckpoints= 500
 NoOfFragmentLogFiles= 3
 DiskPageBufferMemory= CHOOSE_DiskPageBufferMemory
+ODirect= 1
 # the following parametes just function as a small regression
 # test that the parameter exists
 InitialNoOfOpenFiles= 27
Thread
bk commit into 5.1 tree (tomas:1.2535)tomas5 Jun