Skip to content

Commit 99f7e16

Browse files
committed
First version of Floki with some features
Parsing documents and finding elements by class name This version also contains: - Floki.attribute/2 : get attribute values from parsed elements; - Floki.attribute/3 : get attribute values from parsed elements with a given class name; - Fixed README; - MIT license.
1 parent e1ac5c2 commit 99f7e16

File tree

7 files changed

+189
-8
lines changed

7 files changed

+189
-8
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
/deps
33
erl_crash.dump
44
*.ez
5+
/tmp
6+
*.swp

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2014 Philip Sampaio Silva
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

README.md

+20-5
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,39 @@
11
Floki
22
=====
33

4-
Floki parses and query HTML documents using query selectors (like jQuery)
4+
Floki is useful to search inside HTML documents using query selectors (like jQuery).
5+
Under the hood, it uses the [Mochiweb](https://github.com/mochi/mochiweb) HTML parser.
6+
7+
This version works with simple CSS class selectors (without nesting or group),
8+
like `.class-name`.
59

610
## API
711

8-
To parse a document, try:
12+
To parse a HTML document, try:
913

1014
```elixir
1115
Floki.parse(html)
1216
```
1317

14-
To find elements with the class `js-action`, try:
18+
To find elements with the class `js-link`, try:
1519

1620
```elixir
17-
Floki.find(".js-action", html)
21+
Floki.find(html, ".js-link")
1822
```
1923

2024
To fetch some attribute from elements, try:
2125

2226
```elixir
23-
Floki.get_attribute("href", ".js-action", html)
27+
Floki.attribute(html, ".js-link", "href")
2428
```
29+
30+
You can also get attributes from elements that you already have:
31+
32+
```elixir
33+
Floki.find(html, ".js-link")
34+
|> Floki.attribute("href")
35+
```
36+
37+
## License
38+
39+
Floki is under MIT license. Check the `LICENSE` file for more details.

lib/floki.ex

+61
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,63 @@
11
defmodule Floki do
2+
def parse(html) do
3+
:mochiweb_html.parse(html)
4+
end
5+
6+
def find(html, selector) when is_binary(html) do
7+
parse(html)
8+
|> find(selector)
9+
end
10+
11+
def find(html_tree, "." <> class) do
12+
find_by_class(class, html_tree, [])
13+
|> Enum.reverse
14+
end
15+
16+
def attribute(html, selector, attribute_name) do
17+
html
18+
|> find(selector)
19+
|> get_attribute_values(attribute_name)
20+
end
21+
22+
def attribute(elements, attribute_name) do
23+
elements
24+
|> get_attribute_values(attribute_name)
25+
end
26+
27+
def class_match?(attributes, class) do
28+
attribute_match?(attributes, "class", class)
29+
end
30+
31+
defp attribute_match?(attributes, attribute_name, value) do
32+
Enum.find(attributes, fn(attribute) ->
33+
{ attr_name, attr_value } = attribute
34+
35+
attr_name == attribute_name && String.contains?(attr_value, value)
36+
end)
37+
end
38+
39+
defp find_by_class(_class, {}, acc), do: acc
40+
defp find_by_class(_class, [], acc), do: acc
41+
defp find_by_class(_class, tree, acc) when is_binary(tree), do: acc
42+
defp find_by_class(class, [h|t], acc) do
43+
acc = find_by_class(class, h, acc)
44+
find_by_class(class, t, acc)
45+
end
46+
defp find_by_class(class, { name, attributes, child_node }, acc) do
47+
if class_match?(attributes, class) do
48+
acc = [{name, attributes, child_node}|acc]
49+
end
50+
51+
find_by_class(class, child_node, acc)
52+
end
53+
54+
def get_attribute_values(elements, attr_name) do
55+
Enum.map(elements, fn(el) ->
56+
{ _name, attributes, _childs } = el
57+
58+
attribute_match?(attributes, attr_name, "")
59+
end)
60+
|> Enum.reject(fn(x) -> is_nil(x) end)
61+
|> Enum.map(fn({_attr_name, value}) -> value end)
62+
end
263
end

mix.exs

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ defmodule Floki.Mixfile do
2525
#
2626
# Type `mix help deps` for more examples and options
2727
defp deps do
28-
[]
28+
[
29+
{:mochiweb, git: "https://github.com/mochi/mochiweb.git", tag: "v2.9.2"}
30+
]
2931
end
3032
end

mix.lock

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
%{"mochiweb": {:git, "https://github.com/mochi/mochiweb.git", "66a6535692da7b8830bd141eee26025af4928b8a", [tag: "v2.9.2"]}}

test/floki_test.exs

+81-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,86 @@
11
defmodule FlokiTest do
22
use ExUnit.Case
33

4-
test "the truth" do
5-
assert 1 + 1 == 2
4+
@html """
5+
<html>
6+
<head>
7+
<title>Test</title>
8+
</head>
9+
<body>
10+
<div class='content'>
11+
<a href='http://google.com' class='js-google js-cool'>Google</a>
12+
<a href='http://elixir-lang.org' class='js-elixir js-cool'>Elixir lang</a>
13+
<a href='http://java.com' class='js-java'>Java</a>
14+
</div>
15+
</body>
16+
</html>
17+
"""
18+
19+
test "parse simple html" do
20+
parsed = Floki.parse(@html)
21+
22+
assert parsed == {"html", [],
23+
[{"head", [], [{"title", [], ["Test"]}]},
24+
{"body", [],
25+
[{"div", [{"class", "content"}],
26+
[{"a", [{"href", "http://google.com"}, {"class", "js-google js-cool"}], ["Google"]},
27+
{"a", [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}], ["Elixir lang"]},
28+
{"a", [{"href", "http://java.com"}, {"class", "js-java"}], ["Java"]}]}]}]}
29+
end
30+
31+
test "find elements with a given class" do
32+
class_selector = ".js-cool"
33+
34+
assert Floki.find(@html, class_selector) == [{"a", [{"href", "http://google.com"}, {"class", "js-google js-cool"}], ["Google"]},
35+
{"a", [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}], ["Elixir lang"]}]
36+
end
37+
38+
test "does not find elements" do
39+
class_selector = ".nothing"
40+
41+
assert Floki.find(@html, class_selector) == []
42+
end
43+
44+
test "matching a class by a given name" do
45+
class_name = "a-class"
46+
attributes = [{"class", class_name}, {"title", "a title"}]
47+
48+
assert Floki.class_match?(attributes, class_name)
49+
end
50+
51+
test "does not match by class name" do
52+
class_name = "a-class"
53+
attributes = [{"class", "another-class"}, {"title", "a title"}]
54+
55+
refute Floki.class_match?(attributes, class_name)
56+
end
57+
58+
test "does not match when attributes list is empty" do
59+
class_name = "a-class"
60+
attributes = []
61+
62+
refute Floki.class_match?(attributes, class_name)
63+
end
64+
65+
test "get attribute values from elements with a given class" do
66+
class_selector = ".js-cool"
67+
expected_hrefs = ["http://google.com", "http://elixir-lang.org"]
68+
69+
assert Floki.attribute(@html, class_selector, "href") == expected_hrefs
70+
end
71+
72+
test "get attributes from elements" do
73+
class_selector = ".js-cool"
74+
expected_hrefs = ["http://google.com", "http://elixir-lang.org"]
75+
elements = Floki.find(@html, class_selector)
76+
77+
assert Floki.attribute(elements, "href") == expected_hrefs
78+
end
79+
80+
test "get attributes that does not exist" do
81+
class_selector = ".js-cool"
82+
elements = Floki.find(@html, class_selector)
83+
84+
assert Floki.attribute(elements, "title") == []
685
end
786
end

0 commit comments

Comments
 (0)