improve coma and space delimitation routine

main
Christos Angelopoulos 2022-05-21 13:09:36 +03:00
parent 6e8d8f6691
commit 39a2e2ad19
1 changed files with 2 additions and 34 deletions

36
Sapo.sh
View File

@ -45,40 +45,8 @@ sed -i -f $HOME/git/sapo/letters.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced
sed -i -f $HOME/git/sapo/abbreviations.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
sed -i -f $HOME/git/sapo/fonetix.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
##################extra delimit to comma and space ###################################
DELIM=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|wc -l)
while [ $DELIM -ge 1 ]
do
CUR_DELIM_LINE=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|head -$DELIM|tail +$DELIM)
CHAR_COUNT=$(echo "$CUR_DELIM_LINE"|wc -m)
while [ $CHAR_COUNT -gt 290 ]
do
echo "character count : "$CHAR_COUNT
echo "characters > 290"
COMMA_COUNT=$(echo "$CUR_DELIM_LINE"| sed -e 's/\(.\)/\1\n/g' | grep "," | wc -l)
echo "comma count : "$COMMA_COUNT
if [ $COMMA_COUNT -gt 2 ]
then
echo "commas > 2"
echo $DELIM"s/\,/\,\n/"$(($COMMA_COUNT - 2))>"$DIRECTORY""Sapo_""$NAME"/script.sed
sed -i -f "$DIRECTORY""Sapo_""$NAME"/script.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
fi
if [ $COMMA_COUNT -le 2 ]
then
echo "No commas to delimit to, proceed delimiting with spaces(limit 15)"
SPACE_COUNT=$(echo "$CUR_DELIM_LINE"| sed -e 's/\(.\)/\1\n/g' | grep " " | wc -l)
echo "Space count : "$SPACE_COUNT
echo $DELIM"s/\ /\|\n/"$(($SPACE_COUNT - 15))>"$DIRECTORY""Sapo_""$NAME"/script.sed
sed -i -f "$DIRECTORY""Sapo_""$NAME"/script.sed "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
fi
CUR_DELIM_LINE=$(cat "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|head -$DELIM|tail +$DELIM)
CHAR_COUNT=$(echo "$CUR_DELIM_LINE"|wc -m)
done
echo "Delimitation completed for line "$DELIM
((DELIM--))
done
sed -e '/.\{300,\}/s/\(\([^\,]*\,\)\{3\}\)/\1\n/g' "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt|sed -e '/.\{290,\}/s/\(\([^\ ]*\ \)\{50\}\)/\1\|\n/g'>"$DIRECTORY""Sapo_""$NAME"/temp1.txt
mv "$DIRECTORY""Sapo_""$NAME"/temp1.txt "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
####################### Extra substitutions
sed -i "s/^\,$//g;s/^ *'//g;s/^'//g;s/^ *//g;s/^//g " "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt
sed -i "/^$/d" "$DIRECTORY""Sapo_""$NAME"/"$NAME"sentenced.txt #get rid of empty lines