diff --git a/926751.html b/926751.html new file mode 100644 index 0000000..0ba4149 --- /dev/null +++ b/926751.html @@ -0,0 +1,13624 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Club | Strava USA auf Strava + + + + + + + + + + + + + + + + + +
+ + +
+ +
+ +
+
Loading …
+
+
+
+ Strava USA +
+
+
+
+
+ +
+
+
+
+ +
+
+

+ Strava USA + Verifizierter Club +

+
+
+ Multisport +   + United States +
+
+
+
+

+ The official home for Strava athletes in the USA and those who + want to follow along in other nations of the world. +

+ +

+ We promise a place to discover remarkable activities, stories + and athletes - maybe even the occasional product update. +

+ +

+ Mostly, sharing highlights from across all sports within our + community. +

+
+ +
+
+
+
+ + +
+
+

Club-Beiträge

+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Gobble Gobble +

+
+ Over half of the US will be participating in + a Turkey Trot today! If you're new here, + maybe you haven't seen the Turkey Trot + Polyline! ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Impossible Routes - Tour de Gravel +

+
+ Gearing up to celebrate with your family + this week, but still seeking inspiration to + get those last miles in before the end of + the year? ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Celebrating Take a Hike Day with The Black + Neighborhood +

+
+ Founded by Cory Elliot and Bryce Savoy, The + Black Neighborhood is a non-profit + organization dedicated to providing safe + spaces for the black... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Join Strava in San Francisco - November 16 +

+
+ Alison Mariella Désir, advisor, founder, + activist, and connector has added another + title to her a name; author. Join us at the + Strava San Fr... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ How are things inside your head? +

+
+ Ask almost any high performer the keys to + their success and time and time again you + will discover a theme around mental strength + and fortitu... +
+
+
+
+
+
+
+ Jor-El Caraballo is a licensed therapist, co-founder of Viva, and sought-after expert in the interconnectedness of mental health, inclusivity, and culture. He received his B.A. in Psychology from the University of North Carolina at Wilmington and M.A. and Ed.M. degrees in Psychological Counseling from Teachers College Columbia University. Jor-El’s work has been featured in Healthline, Men’s Health, Teen Vogue, and Essence. He is the author of the newly released book, “The Shadow Work Workbook”. +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ What's holding you back from reaching your + goals? +

+
+ Francisco Postlethwaite had a goal to train + for triathlons, but because he is + constrained to a wheelchair, he didn’t know + where or how to ge... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Finding motivation through community +

+
+ Gabrielle Platt was ready to give up on + cycling altogether. She was facing mental + and financial hurdles and needed support to + reach her goal... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Find Your Why +

+
+ It can be incredibly overwhelming to hit a + hard spot in your training. Some runners + have described it as feeling like an + existential crisis ... +
+
+
+
+
+
+
+ Rachel (she/her/hers) is a licensed therapist and co-founder of Viva, a multi-city mental health & wellness practice. She believes that wellness looks different on everyone and is dedicated to Viva's mission of making therapy more accessible and individualized. A four time marathon finisher and five time black belt in martial arts despite managing a chronic illness, she strongly believes in a holistic approach to mental health and healthcare in general. +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Meet the Coach: Coach Kai +

+
+ Run Coach Kai is a USATF and RRCA Level I + and II certified running coach. Kai has + spent almost eight years coaching thousands + of everyday pe... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Jeremiah Bishop - The Big Bear Loop +

+
+ Jeremiah Bishop, isn't just a professional + mountain biker, but also one of the creators + of the Impossible Routes series. The name + says it al... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ UTMB® Highlights +

+
+ Katie Schide 🇺🇸 is the new queen of the + UTMB®. 👑 ... +
+
+
+
+
+
+
+ KUDOS ➡️ https://www.strava.com/activities/7711113555 +
+
+
+ +
+
+
+ +
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Bringing Chamonix to Life With Video +

+
+ There’s something about Chamonix. Maybe it’s + the small-town charm. Maybe it’s the + croissants. (They’re incroyable!) Or maybe + it’s that – wit... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Meet Our Run for Good Contest Winners +

+
+ What a contest! Thank you to all of our + entrants; the stories you shared with us + were just incredible. After careful + consideration of each a... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Liz Derstine; The Long Trail +

+
+ Liz Derstine, trail runner, fulfilled her + dream of running/hiking the Long Trail, + earning the self-supported female’s Fastest + Known Time (FK... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Watch the Pros’ Training Like Never Before + With Video on Strava +

+
+ The UTMB® Mont-Blanc events are arguably the + biggest trail races of the year – and since + their Alpine courses are some of the most + difficult... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Contest Update for TCS New York City + Marathon +

+
+ Our TCS New York City Marathon contest + generated incredible video submissions! So + much so, that we need just a bit more time + to review them ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Gravel Stoke in Steamboat This Weekend +

+
+ If you enjoy a "choose your own adventure" + book from time-to-time and also dabble in a + little gravel cycling, we have the race for + you. ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Enter to Win an Entry to the TCS New York + City Marathon +

+
+ There are just a few days left to enter our + 2022 TCS New York City Marathon Entry + Contest! Want to wake up and run in the city + that never sl... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Your Chance to Run the TCS New York City + Marathon +

+
+ The TCS New York City Marathon is back this + year and at full capacity for the first time + since 2019. On November 6, 55K people will + gather o... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+ + + + + +
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: Girl Gang Crazy +

+
+ Here’s a pretty cool résumé: Mariah Dyson is + a non-profit founder, athlete, running + catalyst and stuntwoman. Born and raised in + LA, she’s ma... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: Meet the New York City + Marathon’s First-Ever Openly Non-binary + Runner +

+
+ Lauren Lubin April has been at the forefront + of non-binary and transgender advancements + in sports and health, academia, film and + media, publ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: How Latoya Snell Became + the Community Leader She Needed +

+
+ Latoya Shauntay Snell is the founder of + Running Fat Chef, a food and fitness blog + that houses her unique story as a queer, + African American,... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: Building a More Inclusive + Outdoor Industry +

+
+ Teresa Baker is an outdoor industry + trailblazer and an unapologetic troublemaker + on a mission to create a more welcoming and + inclusive outdo... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: Women Supporting Women on + Bikes +

+
+ Lauren Pickman is a co-founder of Women of + OZ (https://www.woznwa.com) – the + fastest-growing women’s mountain bike + organization in the US – and of an epic bike + race called the Rule of Three + (https://www.ruleoft... +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Unite with riders from around the globe on + World Bicycle Day! +

+
+ Many see bicycles as a way of moving their + body, exercising their muscles, a form of + meditation. In rural regions of the world, + where distan... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: Forging Space for Black + and Brown Athletes in Running +

+
+ Alison Mariella Désir is a powerhouse. She + sets her own rules and builds pathways that + never existed for Black and brown people in + the outdo... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Changing the Game: How a Discriminatory + Hijab Ban Fueled a New Senate Bill +

+
+ Noor Alexandria Abukaram is a freshman at + The Ohio State University. She’s passionate + about fashion design and sports, including + soccer, tra... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Showing Up Through Movement: Meet Rosanna + Peng! +

+
+ “I want to capture movement authentically. + Being a runner, I see some pretty gnarly + things on the road. People peeing their + pants to save ti... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Showing Up Through Movement: Meet Glen + Hartrick ! +

+
+ “When laying in my hospital bed 2 months + after my accident I had to find a way to + pick up the pieces and move forward. Sports + allowed me to ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Showing Up Through Movement: Meet Ryan + Willms! +

+
+ “I find a lot of joy, learning and growth + through movement and try to bring a mindful + lens of sustainability to sport. When I + decided to do ... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Showing Up Through Movement: Meet Avantia + & Jamie! +

+
+ “My community has seen things in me that at + the time, I didn’t see in myself. I started + running very late in my adult life and + wasn’t an ath... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+ +
+
+ + +
+
+
+
+
+

+ Showing Up Through Movement: Meet Gabriel + Ortega! +

+
+ “The time I spend running or working out is + an investment in myself and my health. The + joy I receive carries over into my time with + my famil... +
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+ + + +
+
+
+ +
+
+ +
+ +
+
+
+ + + + + diff --git a/Textzerlegung.py b/Textzerlegung.py new file mode 100644 index 0000000..f40ab7c --- /dev/null +++ b/Textzerlegung.py @@ -0,0 +1,77 @@ +import os + +# Ordner, in dem sich die Textdateien und das Skript befinden +ordner_pfad = os.path.dirname(os.path.abspath(__file__)) + +# Listen von Titeln, Start- und Endmustern +titel = [ +"Name", # 1 +"Verifizierter Club", # 2 +"Mitglieder", # 3 +"Kategorie", # 4 +"Land", # 5 +"Club ID", # 6 +"Logo", # 7 +"Titel8", # 8 +"Titel9", # 9 +"Titel10", # 10 +] + +start_muster = [ +'', # 2 +'
\n

\n', # 3 +'icon-lg">', # 4 +'\n\r \n\r',  # 5
+'<meta content=', # 1 +"", # 2 +' Mitglieder\n

', # 3 +"", # 4 +'" src="data:,">', # 5 +'" property="og:url">', # 6 +'/large.jpg"', # 7 +"END8", # 8 +"END9", # 9 +"END10", # 10 +] + +# Ausgabedatei +ausgabe_datei = "extrahierte_werte.txt" + +def extrahiere_werte(datei): + extrahierte_werte = {t: [] for t in titel} + with open(datei, 'r') as textdatei: + zeilen = textdatei.readlines() + for zeile in zeilen: + for i in range(len(start_muster)): + start_index = zeile.find(start_muster[i]) + end_index = zeile.find(end_muster[i]) + if start_index != -1 and end_index != -1: + wert = zeile[start_index + len(start_muster[i]):end_index] + extrahierte_werte[titel[i]].append(wert) + return extrahierte_werte + +def main(): + with open(ausgabe_datei, 'w') as ausgabe: + ausgabe.write("Dateiname\t" + "\t".join(titel) + "\n") + for datei_name in os.listdir(ordner_pfad): + if datei_name.endswith(".html"): + datei_pfad = os.path.join(ordner_pfad, datei_name) + extrahierte_werte = extrahiere_werte(datei_pfad) + if extrahierte_werte: + dateiname = os.path.splitext(datei_name)[0] + ausgabe.write(dateiname) + for t in titel: + ausgabe.write("\t" + "\t".join(extrahierte_werte[t])) + ausgabe.write("\n") + +if __name__ == "__main__": + main() diff --git a/extrahierte_werte.txt b/extrahierte_werte.txt new file mode 100644 index 0000000..6d3e37b --- /dev/null +++ b/extrahierte_werte.txt @@ -0,0 +1,2 @@ +Dateiname Name Verifizierter Club Mitglieder Kategorie Land Club ID Logo Titel8 Titel9 Titel10 +926751 Strava USA Verifizierter Club 27.618 Mitglieder Multisport diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..0e6bfeb --- /dev/null +++ b/poetry.lock @@ -0,0 +1,155 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "bs4" +version = "0.0.1" +description = "Dummy package for Beautiful Soup" +optional = false +python-versions = "*" +files = [ + {file = "bs4-0.0.1.tar.gz", hash = "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"}, +] + +[package.dependencies] +beautifulsoup4 = "*" + +[[package]] +name = "lxml" +version = "4.9.3" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ + {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, + {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, + {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, + {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, + {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, + {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, + {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, + {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, + {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, + {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, + {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, + {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, + {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, + {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, + {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, + {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, + {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, + {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, + {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, + {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, + {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, + {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, + {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, + {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, + {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.35)"] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "6b041199122f4c8add99150b7e559d18d2bbeb4c75486f5e9cc0917e25c07e16" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a90763f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "jens-gps-tools" +version = "0.1.0" +description = "" +authors = ["Tobias K. "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" +bs4 = "^0.0.1" +lxml = "^4.9.3" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/textzerlegung_ganzertext.py b/textzerlegung_ganzertext.py new file mode 100644 index 0000000..26f91eb --- /dev/null +++ b/textzerlegung_ganzertext.py @@ -0,0 +1,81 @@ +import os + +# Ordner, in dem sich die Textdateien und das Skript befinden +ordner_pfad = os.path.dirname(os.path.abspath(__file__)) + +input = "926751.html" + +# Listen von Titeln, Start- und Endmustern +titel = [ +"Name", # 1 +"Verifizierter Club", # 2 +"Mitglieder", # 3 +"Kategorie", # 4 +"Land", # 5 +"Club ID", # 6 +"Logo", # 7 +"Titel8", # 8 +"Titel9", # 9 +"Titel10", # 10 +] + +start_muster = [ +'', # 2 +'
\n

\n', # 3 +'icon-lg">', # 4 +'\n\r \n\r',  # 5
+'<meta content=', # 1 +"", # 2 +' Mitglieder\n

', # 3 +"", # 4 +'" src="data:,">', # 5 +'" property="og:url">', # 6 +'/large.jpg"', # 7 +"END8", # 8 +"END9", # 9 +"END10", # 10 +] + +# Ausgabedatei +ausgabe_datei = "extrahierte_werte.txt" + +def extrahiere_werte(datei): + extrahierte_werte = {t: [] for t in titel} + with open(datei, 'r') as textdatei: + text = textdatei.read() + for i in range(len(start_muster)): + start_index = text.find(start_muster[i]) + end_index = text.find(end_muster[i]) + if start_index != -1 and end_index != -1: + wert = text[start_index + len(start_muster[i]):end_index] + extrahierte_werte[titel[i]].append(wert) + return extrahierte_werte + +def main(): + print("gogo") + with open(ausgabe_datei, 'w') as ausgabe: + ausgabe.write("Dateiname\t" + "\t".join(titel) + "\n") + for datei_name in os.listdir(ordner_pfad): + if datei_name.endswith(".html"): + print("handling {datei_name}".format(datei_name=datei_name)) + datei_pfad = os.path.join(ordner_pfad, datei_name) + extrahierte_werte = extrahiere_werte(datei_pfad) + print(extrahierte_werte) + if extrahierte_werte: + dateiname = os.path.splitext(datei_name)[0] + ausgabe.write(dateiname) + for t in titel: + ausgabe.write("\t" + "\t".join(extrahierte_werte[t])) + ausgabe.write("\n") + +if __name__ == "__main__": + main() diff --git a/textzerlegung_parsed.py b/textzerlegung_parsed.py new file mode 100644 index 0000000..79f8725 --- /dev/null +++ b/textzerlegung_parsed.py @@ -0,0 +1,102 @@ +import os +from bs4 import BeautifulSoup + +# Ordner, in dem sich die Textdateien und das Skript befinden +ordner_pfad = os.path.dirname(os.path.abspath(__file__)) + +input = "926751.html" + +# Listen von Titeln, Start- und Endmustern +titel = [ +"Name", # 1 +"Verifizierter Club", # 2 +"Mitglieder", # 3 +"Kategorie", # 4 +"Land", # 5 +"Club ID", # 6 +"Logo", # 7 +"Titel8", # 8 +"Titel9", # 9 +"Titel10", # 10 +] + +selectors = [ +'meta[property="og:title"]', # 1 +'span[original-title="Verifizierter Club"]', # 2 +'div.club-members.section > h3', # 3 +'span.icon-workout.icon-lg', # 4 +# usw... TODO... die weiteren Selectoren eintragen... +'\n\r \n\r',  # 5
+'<meta content=', # 1 +"", # 2 +' Mitglieder\n', # 3 +"", # 4 +'" src="data:,">', # 5 +'" property="og:url">', # 6 +'/large.jpg"', # 7 +"END8", # 8 +"END9", # 9 +"END10", # 10 +] + +# Ausgabedatei +ausgabe_datei = "extrahierte_werte.txt" + +def extrahiere_werte(datei): + extrahierte_werte = {t: [] for t in titel} + with open(datei, 'r') as textdatei: + index = textdatei.read() + S = BeautifulSoup(index, 'lxml') + for i in range(min([len(selectors), len(properties)])): + Tag = S.select_one(selectors[i]) + property = properties[i] + if property == "##inner": + extrahierte_werte[titel[i]].append(Tag.text) + else: + extrahierte_werte[titel[i]].append(Tag[property]) + Tag = S.select_one('span[original-title="Verifizierter Club"]') + Tag = S.select_one('span.icon-workout.icon-lg') + property = "##inner" + #property = "content" + if property == "##inner": + print(Tag.text) + else: + print(Tag[property]) + return extrahierte_werte + +def main(): + print("gogo") + with open(ausgabe_datei, 'w') as ausgabe: + ausgabe.write("Dateiname\t" + "\t".join(titel) + "\n") + for datei_name in os.listdir(ordner_pfad): + if datei_name.endswith(".html"): + print("handling {datei_name}".format(datei_name=datei_name)) + datei_pfad = os.path.join(ordner_pfad, datei_name) + extrahierte_werte = extrahiere_werte(datei_pfad) + print(extrahierte_werte) + if extrahierte_werte: + dateiname = os.path.splitext(datei_name)[0] + ausgabe.write(dateiname) + for t in titel: + ausgabe.write("\t" + "\t".join(extrahierte_werte[t])) + ausgabe.write("\n") + +if __name__ == "__main__": + main()