I am trying to have the user of this script define what html tag they want printed from a document and then print all lines between those tags.
e.g.
<html>
<p>;lasdjf;lsdakjf</p>
if users raw input is <p> I want to print ';lasdjf;lsdakjf'
Right now it doesn't print anything but gives me no errors. Could someone please explain what I am doing wrong.
#!/usr/bin/python
# Filename: Meta_grab.py
import re, os, glob
l = 1
tags = {
"<a>" : "</a>",
"<abbr>" : "</abbr>",
"<acronym>" : "</acronym>",
"<address>" : "</address>",
"<applet>" : "</applet>",
"<area>" : "</area>",
"<b>" : "</b>",
"<base>" : "</base>",
"<bdo>" : "</bdo>",
"<big>" : "</big>",
"<blockquote>" : "</blockquote>",
"<body>" : "</body>",
"<br>" : "</br>",
"<button>" : "</button>",
"<caption>" : "</caption>",
"<cite>" : "</cite>",
"<code>" : "</code>",
"<col>" : "</col>",
"<colgroup>" : "</colgroup>",
"<dd>" : "</dd>",
"<del>" : "</del>",
"<dfn>" : "</dfn>",
"<div>" : "</div>",
"<dl>" : "</dl>",
"<DOCTYPE>" : "</DOCTYPE>",
"<dt>" : "</dt>",
"<em>" : "</em>",
"<fieldset>" : "</fieldset>",
"<form>" : "</form>",
"<frame>" : "</frame>",
"<frameset>" : "</frameset>",
"<h1>" : "</h1>",
"<h2>" : "</h2>",
"<h3>" : "</h3>",
"<h4>" : "</h4>",
"<h5>" : "</h5>",
"<h6>" : "</h6>",
"<head>" : "</head>",
"<hr>" : "</hr>",
"<html>" : "</html>",
"<i>" : "</i>",
"<iframe>" : "</iframe>",
"<img>" : "</img>",
"<input>" : "</input>",
"<ins>" : "</ins>",
"<kbd>" : "</kbd>",
"<label>" : "</label>",
"<legend>" : "</legend>",
"<li>" : "</li>",
"<link>" : "</link>",
"<map>" : "</map>",
"<meta>" : "</meta>",
"<noframes>" : "</noframes>",
"<noscript>" : "</noscript>",
"<object>" : "</object>",
"<ol>" : "</ol>",
"<optgroup>" : "</optgroup>",
"<option>" : "</option>",
"<p>" : "</p>",
"<param>" : "</param>",
"<pre>" : "</pre>",
"<q>" : "</q>",
"<samp>" : "</samp>",
"<script>" : "</script>",
"<select>" : "</select>",
"<small>" : "</small>",
"<tr>" : "</tr>",
"<tt>" : "</tt>",
"<ul>" : "</ul>",
"<var>" : "</var>",
"<accesskey>" : "</accesskey>",
"<class>" : "</class>",
"<dir>" : "</dir>",
"<id>" : "</id>",
"<lang>" : "</lang>",
"<style>" : "</style>",
"<tabindex>" : "</tabindex>",
"<title>" : "</title>",
"<onblur>" : "</onblur>",
"<onchange>" : "</onchange>",
"<onclick>" : "</onclick>",
"<ondblclick>" : "</ondblclick>",
"<onfocus>" : "</onfocus>",
"<onkeydown>" : "</onkeydown>",
"<onkeypress>" : "</onkeypress>",
"<onkeyup>" : "</onkeyup>",
"<onload>" : "</onload>",
"<onmousedown>" : "</onmousedown>",
"<onmousemove>" : "</onmousemove>",
"<onmouseout>" : "</onmouseout>",
"<onmouseover>" : "</onmouseover>",
"<onmouseup>" : "</onmouseup>",
"<onreset>" : "</onreset>",
"<onselect>" : "</onselect>",
"<onsubmit>" : "</onsubmit>",
"<onunload>" : "</onunload>",
}
while l == 1:
get = raw_input("Type the html tag to get [Example: <p>] ")
for file in glob.glob('*.txt'):
docs = open(file, 'r')
lines = docs.readlines()
for lines in docs:
if lines.startswith('%s' % get) and endswith("%s" % tags['%s' % get]):
print line
else:
pass
docs.close()