f9e4bbbe06e809f22aaea6356898f86ea2200c58 — Nick Econopouly 26 days ago 2be2df0
added conversion of command-line urls
1 files changed, 39 insertions(+), 31 deletions(-)

M plr.py
M plr.py => plr.py +39 -31
@@ 7,24 7,35 @@   def parseargs():
      parser = argparse.ArgumentParser()
-     parser.add_argument('-i', nargs='?', type=argparse.FileType('r'),
-                         default=sys.stdin, action='store', dest='input')
-     parser.add_argument('-o', nargs='?', type=argparse.FileType('w'),
-                          default=sys.stdout, action='store', dest='output')
+     parser.add_argument('-i', '--input-file', type=argparse.FileType('r'),
+                         default=sys.stdin, action='store', dest='input',
+                         help='specify an input file (default: stdin)')
+     parser.add_argument('-o', '--output-file', type=argparse.FileType('w'),
+                         default=sys.stdout, action='store', dest='output',
+                         help='specify an output file (default: stdout)')
+     parser.add_argument('-l', action='store_true',
+                         help='output a list of archive links instead of replacing them \
+                         in the text')
+     parser.add_argument('urls', nargs=argparse.REMAINDER)
      return parser.parse_args()
  
  def main():
+     # get command-line arguments
      args = parseargs()
-     linkfunc = getWebArchiveLink
+ 
+     # set where to write output
      if args.output.name != '<stdout>':
          writeoutput = args.output.write
      else:
          writeoutput = print
  
-     content = args.input.read()
-     content = replaceLinks(content)
-     #writeoutput(content)
- 
+     # read the input from extra command-line args, stdin, or a file
+     if len(args.urls) != 0 :
+         # Ugly workaround because the regex is written for markdown links
+         content ='\n'.join(args.urls)
+     else:
+         content = args.input.read()
+     writeoutput(replaceLinks(content))
  
  def isurl(s):
      if re.match(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', s):


@@ 49,27 60,24 @@ return url
  
  def replaceLinks(text):
-   urls = []
-   for url in  re.findall(r'(https?://[^\s]+)', text):
-     newurl = url.split('"')[0].split('<')[0]
-     while newurl[-1] == '.' or newurl[-1] == ')' or newurl[-1] == '!':
-       newurl = newurl[:-1]
-       urls.append(newurl)
- 
-   newurls = []
-   for url in urls:
-       newurls.append(getWebArchiveLink(url))
+     urls = []
+     # Regex needs a rewrite:
+     for url in re.findall(r'(https?://[^\s]+)', text):
+         newurl = url.split('"')[0].split('<')[0]
+         while newurl[-1] == '.' or newurl[-1] == ')' or newurl[-1] == '!':
+             newurl = newurl[:-1]
+             urls.append(newurl)
+     newurls = []
+     for url in urls:
+         newurls.append(getWebArchiveLink(url))
+         curPos = 0
+         for url, newurl in zip(urls,newurls):
+             if url in text[curPos:]:
+                 newPos = text.index(url)
+                 text = text[0:curPos] + text[curPos:].replace(url,newurl)
+                 curPos = newPos
+     return text
  
- #  print(conversion)
-   curPos = 0
-   for url, newurl in zip(urls,newurls):
-     if url in text[curPos:]:
-       print('url:' + url)
-       print('new url:' + newurl)
-       # print(text[curPos:])
-       newPos = text.index(url)
-       text = text[0:curPos] + text[curPos:].replace(url,newurl)
-       curPos = newPos
-   return text  
  
- main()
+ if __name__ == '__main__':
+     main()