find all hyperlinks <a> in p tag
#include <stdio.h>
#include <string.h>
#define MAX_HTML_LENGTH 1000
void findLinks(const char *html) {
const char *p_start = strstr(html, "<p>");
const char *p_end = strstr(html, "</p>");
const char *a_start = NULL;
const char *a_end = NULL;
while (p_start != NULL && p_end != NULL) {
const char *p_content_start = p_start + strlen("<p>");
const char *p_content_end = p_end;
a_start = strstr(p_content_start, "<a>");
a_end = strstr(p_content_start, "</a>");
while (a_start != NULL && a_end != NULL) {
if (a_start < a_end && a_end < p_content_end) {
const char *link_start = a_start + strlen("<a>");
const char *link_end = a_end;
printf("Found link: ");
while (link_start != link_end) {
putchar(*link_start);
link_start++;
}
putchar('\n');
}
a_start = strstr(a_end + strlen("</a>"), "<a>");
a_end = strstr(a_end + strlen("</a>"), "</a>");
}
p_start = strstr(p_end, "<p>");
p_end = strstr(p_end, "</p>");
}
}
int main() {
const char html[MAX_HTML_LENGTH] = "<html><body><p>Sample text <a href='#'>Link 1</a> inside paragraph.</p><p>Another <a href='https://www.example.com'>Link 2</a> within a paragraph.</p></body></html>";
findLinks(html);
return 0;
}
Explanation:
Library Inclusions:
stdio.h
for standard input/output functions andstring.h
for string manipulation functions.Constant Definition:
MAX_HTML_LENGTH
is defined as the maximum length of the HTML content.findLinks
Function: This function takes an HTML string as input and searches for hyperlinks<a>
within<p>
tags.It starts by finding the first occurrence of
<p>
tag and its corresponding</p>
tag usingstrstr
(string search) function.Within each
<p>
tag, it locates occurrences of<a>
and</a>
tags usingstrstr
within the<p>
content.If it finds an
<a>
tag within the<p>
content, it extracts and prints the content between<a>
and</a>
.
main
Function:- Defines a sample HTML content stored in the
html
array. - Calls the
findLinks
function with the HTML content.
- Defines a sample HTML content stored in the
This C code aims to identify and print out the content of hyperlinks (<a>
tags) within paragraphs (<p>
tags) in a given HTML string.