Warren Young wrote:
> Byrial Jensen wrote:
>
>> So my question is: Do MySQL++ have a limitation so it only works with
>> one actice Connection object at a time,
>
> If it does, it's considered a bug. Patches thoughtfully considered.
Thanks to Warren Young and Earl Miles for the replies. I don't have a
patch, but I do have a very much cut down program to demonstrate the
problems.
When I run the program below with the cur table from the database dump
found at
http://download.wikimedia.org/wikipedia/da/20050516_cur_table.sql.gz,
I get an abort in check_len() which seems to indicate that data returned
from ResUse::fetch_row() have been changed. I suspect that this may be
due to a mysql++ bug. It may of course also be due to misunderstandings
on my side. If so, please tell me what's wrong with the program.
When run with valgrind, I get thousands of invalid read errors. All
about reads inside blocks previously free'd by the
ColData_Tmpl<mysqlpp::const_string> destructor. Like this one:
==32674== Invalid read of size 1
==32674== at 0x1B905491: strlen (mac_replace_strmem.c:189)
==32674== by 0x804B16E: check_len(char const*, unsigned) (cr.cpp:47)
==32674== by 0x804B771: main (cr.cpp:145)
==32674== Address 0x1BE315AD is 13 bytes inside a block of size 41 free'd
==32674== at 0x1B906237: operator delete(void*) (vg_replace_malloc.c:156)
==32674== by 0x1B9D2DE4:
std::string::_Rep::_M_destroy(std::allocator<char> cons
t&) (in /usr/lib/libstdc++.so.6.0.3)
==32674== by 0x1B9D30CD: std::string::~string() (in
/usr/lib/libstdc++.so.6.0.3)
==32674== by 0x804BDD6:
mysqlpp::ColData_Tmpl<mysqlpp::const_string>::~ColData_T
mpl() (cr.cpp:121)
============ Start code ============
#include <mysql++/mysql++.h>
#include <string>
#include <sstream>
#include <iostream>
using namespace std;
using namespace mysqlpp;
#define DATABASE_HOST NULL
#define DATABASE_USER NULL
#define DATABASE_PASSWD NULL
#define DATABASE_NAME "da_20050516"
#define DATABASE_PORT 0
#define DATABASE_SOCKET NULL
Connection con (true /* use exceptions */);
Connection con2 (true /* use exceptions */);
void do_query (const char *q)
{
Query query = con.query ();
query << q;
try {
query.execute ();
}
catch (exception& err) {
cerr << "\nError 1: " << err.what ();
exit (1);
}
}
void do_query_2 (ostringstream& o)
{
const string s = o.str ();
Query query = con2.query ();
try {
query.exec (s);
}
catch (exception& err) {
cerr << "\nError 2:" << err.what ();
exit (1);
}
}
void check_len (const char *start, size_t length)
{
size_t new_length = strlen (start);
if (new_length != length) {
cout << "check_len(): length=" << length
<< ", new_length=" << new_length << endl;
abort ();
}
}
const char *handle_link (unsigned id, const char *text)
{
ostringstream query;
query << "INSERT INTO links (from_id, link) VALUES(" << id << ",'";
for (;;) {
if (! *text) {
// Unmatched [[
return text;
}
if (*text == ']') {
break;
}
else {
if (*text == '\'' || *text == '\\') {
query << '\\';
}
query << *text;
}
++text;
}
query << "')";
do_query_2 (query);
return text + 1;
}
int main ()
{
try {
con.real_connect (DATABASE_NAME, DATABASE_HOST,
DATABASE_USER, DATABASE_PASSWD,
DATABASE_PORT,
false /* compress */,
10 /* timeout */,
DATABASE_SOCKET,
0 /* client_flag */
);
con2.real_connect (DATABASE_NAME, DATABASE_HOST,
DATABASE_USER, DATABASE_PASSWD,
DATABASE_PORT,
false /* compress */,
10 /* timeout */,
DATABASE_SOCKET,
0 /* client_flag */
);
}
catch (exception& err) {
cerr << "Connect error: " << err.what () << endl;
return 1;
}
do_query ("DROP TABLE IF EXISTS links");
do_query
("CREATE TABLE links ("
"from_id mediumint(7) unsigned NOT NULL,"
"link varchar(255) binary NOT NULL default '',"
"INDEX ( from_id )"
") ENGINE=MyISAM");
Query query = con.query ();
query << "SELECT cur_id, cur_text FROM cur";
try {
size_t count;
ResUse res = query.use ();
while (Row row = res.fetch_row ()) {
unsigned int id = row[0];
const char *text = row[1];
long unsigned *lengths = res.fetch_lengths ();
unsigned text_length = lengths[1];
if (memchr (text, '\0', text_length)) {
cout << "Warning: Article " << id
<< " contains NUL characters. Skiping it." << endl;
continue;
}
if (text[text_length] != '\0') {
cout << "Error: text not NUL terminated." << endl;
abort ();
}
const char *saved_start = text;
check_len (saved_start, text_length);
while (*text) {
if (*text == '[' && text[1] == '[') {
text = handle_link (id, text + 2);
}
else {
++text;
}
check_len (saved_start, text_length);
} // While text;
if (++count % 100 == 0) cout << '.' << flush;
} // While row
}
catch (BadQuery&) {
cout << " done" << endl;
}
catch (exception& err) {
cerr << "\nError: " << err.what ();
exit (1);
}
return 0;
}
============ End code ============
>> And another question: Do you think that it would be more efficient to
>> store inserts + updates in for example a local file, and not send
>> these queries to the database until after the mysqlpp::ResUse object
>> is deleted.
>
>
> Probably not. If anything, I'd make the second database local, and do
> your final analysis there. It sounds like both databases are remote, in
> which case your bottleneck is the bandwidth of the link to the database.
> Unless you've got serious pipe, your disk bandwidth will be a lot higher.
I wasn't clear. I only use a local database.
I use a PC with Mandriva Linux 2005 with MySQL 4.1.11,
gcc 3.4.3 and glib 2.3.4
mysql++ 1.7.35 is installed from locally build RPMs.
Best regards
Byrial