mirror of https://gitlab.com/christosangel/sapo
improve coma and space delimitation routine
parent
6e8d8f6691
commit
39a2e2ad19
36
Sapo.sh
36
Sapo.sh
|
@ -45,40 +45,8 @@ sed -i -f $HOME/git/sapo/letters.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced
|
|||
sed -i -f $HOME/git/sapo/abbreviations.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
sed -i -f $HOME/git/sapo/fonetix.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
##################extra delimit to comma and space ###################################
|
||||
DELIM=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|wc -l)
|
||||
while [ $DELIM -ge 1 ]
|
||||
do
|
||||
CUR_DELIM_LINE=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|head -$DELIM|tail +$DELIM)
|
||||
CHAR_COUNT=$(echo "$CUR_DELIM_LINE"|wc -m)
|
||||
while [ $CHAR_COUNT -gt 290 ]
|
||||
do
|
||||
echo "character count : "$CHAR_COUNT
|
||||
|
||||
echo "characters > 290"
|
||||
COMMA_COUNT=$(echo "$CUR_DELIM_LINE"| sed -e 's/\(.\)/\1\n/g' | grep "," | wc -l)
|
||||
echo "comma count : "$COMMA_COUNT
|
||||
if [ $COMMA_COUNT -gt 2 ]
|
||||
then
|
||||
echo "commas > 2"
|
||||
echo $DELIM"s/\,/\,\n/"$(($COMMA_COUNT - 2))>"$DIRECTORY""Sapo_""$NAME"/script.sed
|
||||
sed -i -f "$DIRECTORY""Sapo_""$NAME"/script.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
fi
|
||||
if [ $COMMA_COUNT -le 2 ]
|
||||
then
|
||||
echo "No commas to delimit to, proceed delimiting with spaces(limit 15)"
|
||||
SPACE_COUNT=$(echo "$CUR_DELIM_LINE"| sed -e 's/\(.\)/\1\n/g' | grep " " | wc -l)
|
||||
echo "Space count : "$SPACE_COUNT
|
||||
|
||||
echo $DELIM"s/\ /\|\n/"$(($SPACE_COUNT - 15))>"$DIRECTORY""Sapo_""$NAME"/script.sed
|
||||
sed -i -f "$DIRECTORY""Sapo_""$NAME"/script.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
fi
|
||||
CUR_DELIM_LINE=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|head -$DELIM|tail +$DELIM)
|
||||
CHAR_COUNT=$(echo "$CUR_DELIM_LINE"|wc -m)
|
||||
|
||||
done
|
||||
echo "Delimitation completed for line "$DELIM
|
||||
((DELIM--))
|
||||
done
|
||||
sed -e '/.\{300,\}/s/\(\([^\,]*\,\)\{3\}\)/\1\n/g' "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|sed -e '/.\{290,\}/s/\(\([^\ ]*\ \)\{50\}\)/\1\|\n/g'>"$DIRECTORY""Sapo_""$NAME"/temp1.txt
|
||||
mv "$DIRECTORY""Sapo_""$NAME"/temp1.txt "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
####################### Extra substitutions
|
||||
sed -i "s/^\,$//g;s/^ *'//g;s/^'//g;s/^ *‘//g;s/^‘//g " "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
|
||||
sed -i "/^$/d" "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt #get rid of empty lines
|
||||
|
|
Loading…
Reference in New Issue