Shell脚本 - 分割错误

问题描述:

我试图获得shell脚本工作时遇到了一点问题。我正在尝试制作一个脚本,用于从数据库读取一行文本并将其格式化为一个文本文件,以便与另一个软件一起使用。该脚本需要循环大约5000万次(大型数据库),并且直到5,500到5,800次迭代才能完成,然后它会产生分段错误。
我试图用strace追踪错误(最后几行在下面),但我不确定我在看什么。
Shell脚本 - 分割错误

clone(child_stack=0,flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,      child_tidptr=0xb76f8728) = 17547 
close(4)        = 0 
close(5)        = 0 
pipe([4, 5])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,  child_tidptr=0xb76f8728) = 17548 
close(3)        = 0 
close(5)        = 0 
pipe([3, 5])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17549 
close(4)        = 0 
close(5)        = 0 
pipe([4, 5])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17550 
close(3)        = 0 
close(5)        = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17551 
close(4)        = 0 
close(-1)        = -1 EBADF (Bad file descriptor) 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17546 
--- SIGCHLD (Child exited) @ 0 (0) --- 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17547 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17548 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17549 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17551 
--- SIGCHLD (Child exited) @ 0 (0) --- 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17550 
--- SIGCHLD (Child exited) @ 0 (0) --- 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17545 
--- SIGCHLD (Child exited) @ 0 (0) --- 
write(1, "OK!\n", 4OK! 
)     = 4 
pipe([3, 4])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17552 
close(4)        = 0 
read(3, "10632\n", 128)     = 6 
read(3, "", 128)      = 0 
close(3)        = 0 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17552 
--- SIGCHLD (Child exited) @ 0 (0) --- 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17553 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17553 
--- SIGCHLD (Child exited) @ 0 (0) --- 
write(1, "Preparing file 10632 of ", 24Preparing file 10632 of) = 24 
write(1, "51041073(7) ....", 1651041073(7) ....)  = 16 
pipe([3, 4])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17554 
close(4)        = 0 
read(3, "8\n", 128)      = 2 
read(3, "", 128)      = 0 
close(3)        = 0 
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 17554 
--- SIGCHLD (Child exited) @ 0 (0) --- 
pipe([3, 4])       = 0 
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb76f8728) = 17555 
close(4)        = 0 
read(3, "", 128)      = 0 
close(3)        = 0 
wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV}], 0, NULL) = 17555 
--- SIGCHLD (Child exited) @ 0 (0) --- 
--- SIGSEGV (Segmentation fault) @ 0 (0) --- 
+++ killed by SIGSEGV +++ 
Segmentation fault 

的脚本如下:

#!/bin/sh 
#----VARS---- 
prefix="10.0.3.2/read/" 
oprdir="/home/andrew/doc/read/" 
throttle=0 

#---------------- 
x=5000 
advcount=0 
count=`curl "$prefix""count.php" 2> /dev/null` 

#----Funcs---- 
getline() 
{ 
    #Run curl to get the line of text 
    #TODO -n 
    echo -n "Preparing file" $x "of " 
    if [ $advcount -ge 25 ] 
    then 
    count=`curl "$prefix""count.php" 2> /dev/null` 
    advcount=0 
    fi 
    echo -n $count"("$advcount")" "...." 
    advcount=`expr $advcount + 1` 

    line=`curl "$prefix""testfile-prep.php?x=$x" 2> /dev/null` 
    if [ "$line" = "ERR ERROR: X OUTSIDE RECORDS." ] 
    then 
    echo "ERROR: X OUTSIDE DB... WAITING TO RETRY." 
    sleep 60 
    getline 
    fi 
    prepline 
} 

prepline() 
{ 
    echo $line | sed 's/^[0-9]*\./\n./g' | sed 's/\([a-zA-Z]*\)\./\1\n./g' | sed 's/\,/\n\,/g' | sed 's/(\(.*\)/(\n\1/g' | sed 's/\(.*\))/\1\n)/g' | sed 's/ /\n/g'> out-0-$x.dat 
    echo "OK!" 
    #cat out-$x.dat 
    advance 
} 
advance() 
{ 
    x=`expr $x + 1` 
    sleep $throttle 
    getline 
} 

cd "$oprdir" 
getline 

对不起,我的信息的中篇小说,任何帮助表示赞赏。

+0

*“脚本需要循环大约5000万次”*我必须说这对我有用编译的(或JIT虚拟机)实现可能是这个用例的一个好主意。或者你能证明它是IO界吗? – dmckee 2011-12-20 01:26:06

+1

您的'prepline'函数可以使用'sed -e'command1'-e'command2'...'将多个'sed'命令组合成一个命令。这可能与你的崩溃没有任何关系,但它会减少过程的数量。 – 2011-12-20 02:16:41

您有stack overflow

您的getline函数调用prepline,它调用advance,它调用getline

您应该使用某种类型的循环,而不是在非特殊情况下调用自己的函数。 (在错误情况下的自我调用是可以的,你不会有5,000个错误溢出堆栈。)

+0

'_迭代是人类;为了缓解,神圣?“显然不是在这种情况下,无论如何,明确的迭代可以解决问题。 – 2011-12-20 02:26:37

+0

这似乎解决了它。我真的不知道为什么我会像我那样执行功能。谢谢! – akester 2011-12-20 02:30:32