@@ -24,6 +24,8 @@ import com.nononsenseapps.feeder.db.COL_PRIMARYSORTTIME
24
24
import com.nononsenseapps.feeder.db.COL_PUBDATE
25
25
import com.nononsenseapps.feeder.db.COL_READ_TIME
26
26
import com.nononsenseapps.feeder.db.COL_TITLE
27
+ import com.nononsenseapps.feeder.db.COL_WORD_COUNT
28
+ import com.nononsenseapps.feeder.db.COL_WORD_COUNT_FULL
27
29
import com.nononsenseapps.feeder.db.FEED_ITEMS_TABLE_NAME
28
30
import com.nononsenseapps.feeder.model.host
29
31
import com.nononsenseapps.feeder.ui.text.HtmlToPlainTextConverter
@@ -38,6 +40,8 @@ import java.time.ZonedDateTime
38
40
const val MAX_TITLE_LENGTH = 200
39
41
const val MAX_SNIPPET_LENGTH = 200
40
42
43
+ private val patternWhitespace = " \\ s+" .toRegex()
44
+
41
45
@Entity(
42
46
tableName = FEED_ITEMS_TABLE_NAME ,
43
47
indices = [
@@ -72,21 +76,38 @@ data class FeedItem @Ignore constructor(
72
76
@ColumnInfo(name = COL_ENCLOSURELINK ) var enclosureLink : String? = null ,
73
77
@ColumnInfo(name = COL_ENCLOSURE_TYPE ) var enclosureType : String? = null ,
74
78
@ColumnInfo(name = COL_AUTHOR ) var author : String? = null ,
75
- @ColumnInfo(name = COL_PUBDATE , typeAffinity = ColumnInfo .TEXT ) override var pubDate : ZonedDateTime ? = null ,
79
+ @ColumnInfo(
80
+ name = COL_PUBDATE ,
81
+ typeAffinity = ColumnInfo .TEXT ,
82
+ ) override var pubDate : ZonedDateTime ? = null ,
76
83
@ColumnInfo(name = COL_LINK ) override var link : String? = null ,
77
- @Deprecated(" This column has been 'removed' but sqlite doesn't support drop column." , replaceWith = ReplaceWith (" readTime" ))
84
+ @Deprecated(
85
+ " This column has been 'removed' but sqlite doesn't support drop column." ,
86
+ replaceWith = ReplaceWith (" readTime" ),
87
+ )
78
88
@ColumnInfo(name = " unread" )
79
89
var oldUnread : Boolean = true ,
80
90
@ColumnInfo(name = COL_NOTIFIED ) var notified : Boolean = false ,
81
91
@ColumnInfo(name = COL_FEEDID ) var feedId : Long? = null ,
82
- @ColumnInfo(name = COL_FIRSTSYNCEDTIME , typeAffinity = ColumnInfo .INTEGER ) var firstSyncedTime : Instant = Instant .EPOCH ,
83
- @ColumnInfo(name = COL_PRIMARYSORTTIME , typeAffinity = ColumnInfo .INTEGER ) override var primarySortTime : Instant = Instant .EPOCH ,
92
+ @ColumnInfo(
93
+ name = COL_FIRSTSYNCEDTIME ,
94
+ typeAffinity = ColumnInfo .INTEGER ,
95
+ ) var firstSyncedTime : Instant = Instant .EPOCH ,
96
+ @ColumnInfo(
97
+ name = COL_PRIMARYSORTTIME ,
98
+ typeAffinity = ColumnInfo .INTEGER ,
99
+ ) override var primarySortTime : Instant = Instant .EPOCH ,
84
100
@Deprecated(" This column has been 'removed' but sqlite doesn't support drop column." )
85
101
@ColumnInfo(name = " pinned" )
86
102
var oldPinned : Boolean = false ,
87
103
@ColumnInfo(name = COL_BOOKMARKED ) var bookmarked : Boolean = false ,
88
104
@ColumnInfo(name = COL_FULLTEXT_DOWNLOADED ) var fullTextDownloaded : Boolean = false ,
89
- @ColumnInfo(name = COL_READ_TIME , typeAffinity = ColumnInfo .INTEGER ) var readTime : Instant ? = null ,
105
+ @ColumnInfo(
106
+ name = COL_READ_TIME ,
107
+ typeAffinity = ColumnInfo .INTEGER ,
108
+ ) var readTime : Instant ? = null ,
109
+ @ColumnInfo(name = COL_WORD_COUNT ) var wordCount : Int = 0 ,
110
+ @ColumnInfo(name = COL_WORD_COUNT_FULL ) var wordCountFull : Int = 0 ,
90
111
) : FeedItemForFetching, FeedItemCursor {
91
112
92
113
constructor () : this (id = ID_UNSET )
@@ -101,10 +122,17 @@ data class FeedItem @Ignore constructor(
101
122
) {
102
123
val converter = HtmlToPlainTextConverter ()
103
124
// Be careful about nulls.
104
- val text = entry.content_html ? : entry.content_text ? : " "
125
+ val plainText = converter.convert(
126
+ entry.content_html
127
+ ? : entry.content_text
128
+ ? : " " ,
129
+ )
130
+ this .wordCount = estimateWordCount(plainText)
131
+
105
132
val summary: String = (
106
- entry.summary ? : entry.content_text
107
- ? : converter.convert(text)
133
+ entry.summary
134
+ ? : entry.content_text
135
+ ? : plainText
108
136
).take(MAX_SNIPPET_LENGTH )
109
137
110
138
// Make double sure no base64 images are used as thumbnails
@@ -117,6 +145,7 @@ data class FeedItem @Ignore constructor(
117
145
feed.feed_url != null && safeImage != null -> {
118
146
relativeLinkIntoAbsolute(sloppyLinkToStrictURL(feed.feed_url), safeImage)
119
147
}
148
+
120
149
else -> safeImage
121
150
}
122
151
@@ -178,3 +207,20 @@ interface FeedItemCursor {
178
207
val pubDate: ZonedDateTime ?
179
208
val id: Long
180
209
}
210
+
211
+ /* *
212
+ * If language doesn't use spaces, then this function will try to return 0
213
+ */
214
+ fun estimateWordCount (plainText : String ): Int {
215
+ val charCount = plainText.length.toFloat()
216
+ val wordCount = plainText.splitToSequence(patternWhitespace).count()
217
+
218
+ // Calculate average length of chars between spaces
219
+ // A typical value for english is 5-7
220
+ // A typical value for japanese is 50-80
221
+ return if (charCount / wordCount < 15.0 ) {
222
+ wordCount
223
+ } else {
224
+ 0
225
+ }
226
+ }
0 commit comments