#!/usr/bin/perl # filename: subsurf.pl # author: Marvin Simkin # date: 2002-10-23 # purpose: parse subsurface data from PDF text # syntax: subsurf.pl wells02.txt phx_basin_subsurface_final_39-53.vi # NOTE: # This combines two files based on a common "key" or well ID. # File formats differ. The first file looks like this: # ---------------------------------------------------------------------------- # SITE_ID,XCoord,YCoord,Coord_Sys,MP_Elev,Total_Depth # ADAMS-MW-1,476450,888709,SP,, # AFFC-MW-1,463069,884161,SP,, # AL-MW-2,464893,891009,SP,1103.03, # AL-MW-5,464540,890790,SP,1101.12, # ASE-21C,473595.3125,890166.125,SP,1114.07, # ASE-24C,472306.5,888767.1875,SP,111.06, # AV-MW-4,464317,884844,SP,, # AV-MW-8,464041,885288,SP,, # AZSLD,476045.40625,895145.375,Sta,1154.8,195 # The second file's input data format is a bit irregular... # This is a header. It seems to remain constant except for 2nd line (page #). # ---------------------------------------------------------------------------- # Table 1. Summary of information for selected drill holes in the eastern Phoenix basin, Arizona # 36 # Lithologic Summary # Drill # Hole # Drilling # Method # Elev. # (feet # amsl) # Total # Depth # (in feet) # Depth to # (Elev. of) # Base of SRG # (in feet) # Depth to # (Elev. of) # Base of BF # (in feet) # Depth (in # feet) Unit # Source of # Data # ---------------------------------------------------------------------------- # This is the info for a drill hole. Note multi-valued depth info. # ---------------------------------------------------------------------------- # A Reverse Air # Circulation # Percussion # Hammer # 1118 108 95 (1023) NE 0 @ 10 # 10 @ 95 # 95 @ 108 # Uppermost alluvium # Salt River Gravels # Camels Head Formation # L # Output data format # INITIALIZE: # Remember X, Y for each well my %X; my %Y; # SUBROUTINES: use strict; # ARGUMENTS: # Expecting two filenames die 'Two filenames required' unless $#ARGV == 1; open (WELLS, "<$ARGV[0]") or die "Cannot read $ARGV[0], $!"; open (TABLE, "<$ARGV[1]") or die "Cannot read $ARGV[1], $!"; # MAINLINE: # Preload wells data then try to match it with info from table my $Line; while ($Line = ) { chomp $Line; # skip first row next if $Line eq 'SITE_ID,XCoord,YCoord,Coord_Sys,MP_Elev,Total_Depth'; # All we need is Well ID, X, Y; ignore the rest # ADAMS-MW-1,476450,888709,SP,, my $Well; my $X; my $Y; ($Well, $X, $Y) = split (/,/, $Line); $X{$Well} = $X; $Y{$Well} = $Y; # also sometimes the Well ID is XXX-XX and the second part is redundant # so save it under the first part only my $ShortWell; $ShortWell = $Well; $ShortWell =~ s/-.*//; if ($ShortWell ne $Well) { if (exists ($X{$ShortWell})) { # that key has already been found, the X and Y better agree unless ($X == $X{$ShortWell} and $Y == $Y{$ShortWell}) { warn "Multiple X, Y values for ShortWell '$ShortWell'"; # invalidate the entries $X{$ShortWell} = 'invalid'; $Y{$ShortWell} = 'invalid'; } } else { # not found yet, add it $X{$ShortWell} = $X; $Y{$ShortWell} = $Y; } } } while ($Line = ) { chomp $Line; # print STDERR "Processing line from TABLE: '$Line'\n"; if ($Line eq 'Table 1. Summary of information for selected drill holes in the eastern Phoenix basin, Arizona') { print STDERR "\nFound header... "; $Line =
; chomp $Line; print STDERR "page $Line\n"; until ($Line eq 'Data') { $Line =
; chomp $Line; } } else { # A Reverse Air my $Well; ($Well) = split (' ', $Line); print STDERR "\nWell '$Well'\n"; # loop until a line that looks like this # [possible text here] 1118 108 95 (1023) NE 0 @ 10 until ($Line =~ / [\.0-9]+ @ [\.0-9]+$/) { $Line =
; chomp $Line; } # parse [optional text] TOP DEPTH GRAVEL (ELEV) FILL (ELEV) UNIT#0 # easiest to start from the end and work back # make a copy of the line for munching my $Munch; $Munch = $Line; $Munch =~ /(.*) [\.0-9]+ @ [\.0-9]+$/; $Munch = $1; # now it's [optional text] TOP DEPTH GRAVEL (ELEV) FILL (ELEV) # but FILL (ELEV) might be e.g. NE or NR $Munch =~ /(.*) ([NER\?\(\)0-9]+)$/; $Munch = $1; my $Fill; $Fill = $2; if ($Fill =~ /\(/) { $Munch =~ /(.*) ([NER\?\(\)0-9]+)$/; $Munch = $1; # why keep the (elev)? ##$Fill = "$2 $Fill"; $Fill = $2; } # print STDERR "Fill = '$Fill'\n"; # now it's [optional text] TOP DEPTH GRAVEL (ELEV) # but GRAVEL (ELEV) might be e.g. NE or NR $Munch =~ /(.*) ([NER\?\(\)0-9]+)$/; $Munch = $1; my $Gravel; $Gravel = $2; if ($Gravel =~ /\(/) { $Munch =~ /(.*) ([NER\?\(\)0-9]+)$/; $Munch = $1; # why keep the (elev)? ##$Gravel = "$2 $Gravel"; $Gravel = $2; } # print STDERR "Gravel = '$Gravel'\n"; # now it's [optional text] TOP DEPTH $Munch =~ /([\.0-9]+) ([\.0-9]+)$/; my $Top; $Top = $1; # print STDERR "Top = '$Top'\n"; my $TotalDepth; $TotalDepth = $2; # print STDERR "TotalDepth = '$TotalDepth'\n"; # try to find X and Y in WELLS table my $X; $X = ''; my $Y; $Y = ''; my $WellsKey; # maybe we will get lucky and it matches right away $WellsKey = $Well; unless (defined ($X{$WellsKey})) { # OK, maybe one file says "BC-01" and the other says "BC-1" $WellsKey =~ s/-/-0/; } unless (defined ($X{$WellsKey})) { # So maybe the other file has no dash? $WellsKey = $Well; $WellsKey =~ s/-//; } unless (defined ($X{$WellsKey})) { # Or maybe it is EW01 vs. EW-1 $WellsKey = $Well; $WellsKey =~ s/-/0/; } if (defined ($X{$WellsKey})) { $X = $X{$WellsKey}; $Y = $Y{$WellsKey}; } else { warn "Cannot find key for '$Well'"; } # output a summary well record print "well|$Well|$Top|$TotalDepth|$Gravel|$Fill|$X|$Y\n"; # get ready to accumulate unit data my @UnitTop; my @UnitBottom; # there are usually several; count them my $UnitCount; $UnitCount = -1; while ($Line =~ /[\.0-9\?]+$/) { $UnitCount++; # print STDERR "Unit $UnitCount: $Line\n"; my $UnitTop; my $UnitBottom; if ($Line =~ /([\.0-9]+) @ ([\.0-9\?]+)$/) { $UnitTop = $1; $UnitBottom = $2; } elsif ($Line =~ /([\.0-9\?]+)$/) { $UnitTop = $1; $UnitBottom = $1; } else { die "Can't find depth from $Line"; } $UnitTop[$UnitCount] = $UnitTop; $UnitBottom[$UnitCount] = $UnitBottom; $Line =
; chomp $Line; } # print STDERR "$UnitCount units\n"; # each unit has a name my $NameCount; $NameCount = -1; while ($NameCount < $UnitCount) { $NameCount++; # print STDERR "Name $NameCount: $Line\n"; # output a detail unit record print "unit|$Well|$UnitTop[$NameCount]|$UnitBottom[$NameCount]|$Line\n"; $Line =
; chomp $Line; } # we should have already read the one final line of data # before next hole or header, which should be 1-4 chars until ($Line eq 'C' or $Line eq 'L' or $Line eq 'C, L' or $Line eq 'L, C' or $Line eq '') { print STDERR "Found extra Name! $Line\n"; $Line =
; chomp $Line; } } }